*             CP0_Cause.DC bit or the count_ctl.DC bit.
  *             0 otherwise (in which case CP0_Count timer is running).
  */
-static inline int kvm_mips_count_disabled(struct kvm_vcpu *vcpu)
+int kvm_mips_count_disabled(struct kvm_vcpu *vcpu)
 {
        struct mips_coproc *cop0 = vcpu->arch.cop0;
 
  *
  * Returns:    The ktime at the point of freeze.
  */
-static ktime_t kvm_mips_freeze_hrtimer(struct kvm_vcpu *vcpu, u32 *count)
+ktime_t kvm_mips_freeze_hrtimer(struct kvm_vcpu *vcpu, u32 *count)
 {
        ktime_t now;
 
        hrtimer_start(&vcpu->arch.comparecount_timer, expire, HRTIMER_MODE_ABS);
 }
 
+/**
+ * kvm_mips_restore_hrtimer() - Restore hrtimer after a gap, updating expiry.
+ * @vcpu:      Virtual CPU.
+ * @before:    Time before Count was saved, lower bound of drift calculation.
+ * @count:     CP0_Count at point of restore.
+ * @min_drift: Minimum amount of drift permitted before correction.
+ *             Must be <= 0.
+ *
+ * Restores the timer from a particular @count, accounting for drift. This can
+ * be used in conjunction with kvm_mips_freeze_timer() when a hardware timer is
+ * to be used for a period of time, but the exact ktime corresponding to the
+ * final Count that must be restored is not known.
+ *
+ * It is gauranteed that a timer interrupt immediately after restore will be
+ * handled, but not if CP0_Compare is exactly at @count. That case should
+ * already be handled when the hardware timer state is saved.
+ *
+ * Assumes !kvm_mips_count_disabled(@vcpu) (guest CP0_Count timer is not
+ * stopped).
+ *
+ * Returns:    Amount of correction to count_bias due to drift.
+ */
+int kvm_mips_restore_hrtimer(struct kvm_vcpu *vcpu, ktime_t before,
+                            u32 count, int min_drift)
+{
+       ktime_t now, count_time;
+       u32 now_count, before_count;
+       u64 delta;
+       int drift, ret = 0;
+
+       /* Calculate expected count at before */
+       before_count = vcpu->arch.count_bias +
+                       kvm_mips_ktime_to_count(vcpu, before);
+
+       /*
+        * Detect significantly negative drift, where count is lower than
+        * expected. Some negative drift is expected when hardware counter is
+        * set after kvm_mips_freeze_timer(), and it is harmless to allow the
+        * time to jump forwards a little, within reason. If the drift is too
+        * significant, adjust the bias to avoid a big Guest.CP0_Count jump.
+        */
+       drift = count - before_count;
+       if (drift < min_drift) {
+               count_time = before;
+               vcpu->arch.count_bias += drift;
+               ret = drift;
+               goto resume;
+       }
+
+       /* Calculate expected count right now */
+       now = ktime_get();
+       now_count = vcpu->arch.count_bias + kvm_mips_ktime_to_count(vcpu, now);
+
+       /*
+        * Detect positive drift, where count is higher than expected, and
+        * adjust the bias to avoid guest time going backwards.
+        */
+       drift = count - now_count;
+       if (drift > 0) {
+               count_time = now;
+               vcpu->arch.count_bias += drift;
+               ret = drift;
+               goto resume;
+       }
+
+       /* Subtract nanosecond delta to find ktime when count was read */
+       delta = (u64)(u32)(now_count - count);
+       delta = div_u64(delta * NSEC_PER_SEC, vcpu->arch.count_hz);
+       count_time = ktime_sub_ns(now, delta);
+
+resume:
+       /* Resume using the calculated ktime */
+       kvm_mips_resume_hrtimer(vcpu, count_time, count);
+       return ret;
+}
+
 /**
  * kvm_mips_write_count() - Modify the count and update timer.
  * @vcpu:      Virtual CPU.
        ++vcpu->stat.wait_exits;
        trace_kvm_exit(vcpu, KVM_TRACE_EXIT_WAIT);
        if (!vcpu->arch.pending_exceptions) {
+               kvm_vz_lose_htimer(vcpu);
                vcpu->arch.wait = 1;
                kvm_vcpu_block(vcpu);
 
 
  * VZ guest timer handling.
  */
 
+/**
+ * kvm_vz_should_use_htimer() - Find whether to use the VZ hard guest timer.
+ * @vcpu:      Virtual CPU.
+ *
+ * Returns:    true if the VZ GTOffset & real guest CP0_Count should be used
+ *             instead of software emulation of guest timer.
+ *             false otherwise.
+ */
+static bool kvm_vz_should_use_htimer(struct kvm_vcpu *vcpu)
+{
+       if (kvm_mips_count_disabled(vcpu))
+               return false;
+
+       /* Chosen frequency must match real frequency */
+       if (mips_hpt_frequency != vcpu->arch.count_hz)
+               return false;
+
+       /* We don't support a CP0_GTOffset with fewer bits than CP0_Count */
+       if (current_cpu_data.gtoffset_mask != 0xffffffff)
+               return false;
+
+       return true;
+}
+
 /**
  * _kvm_vz_restore_stimer() - Restore soft timer state.
  * @vcpu:      Virtual CPU.
  * @compare:   CP0_Compare register value, restored by caller.
  * @cause:     CP0_Cause register to restore.
  *
- * Restore VZ state relating to the soft timer.
+ * Restore VZ state relating to the soft timer. The hard timer can be enabled
+ * later.
  */
 static void _kvm_vz_restore_stimer(struct kvm_vcpu *vcpu, u32 compare,
                                   u32 cause)
 }
 
 /**
- * kvm_vz_restore_timer() - Restore guest timer state.
+ * _kvm_vz_restore_htimer() - Restore hard timer state.
+ * @vcpu:      Virtual CPU.
+ * @compare:   CP0_Compare register value, restored by caller.
+ * @cause:     CP0_Cause register to restore.
+ *
+ * Restore hard timer Guest.Count & Guest.Cause taking care to preserve the
+ * value of Guest.CP0_Cause.TI while restoring Guest.CP0_Cause.
+ */
+static void _kvm_vz_restore_htimer(struct kvm_vcpu *vcpu,
+                                  u32 compare, u32 cause)
+{
+       u32 start_count, after_count;
+       ktime_t freeze_time;
+       unsigned long flags;
+
+       /*
+        * Freeze the soft-timer and sync the guest CP0_Count with it. We do
+        * this with interrupts disabled to avoid latency.
+        */
+       local_irq_save(flags);
+       freeze_time = kvm_mips_freeze_hrtimer(vcpu, &start_count);
+       write_c0_gtoffset(start_count - read_c0_count());
+       local_irq_restore(flags);
+
+       /* restore guest CP0_Cause, as TI may already be set */
+       back_to_back_c0_hazard();
+       write_gc0_cause(cause);
+
+       /*
+        * The above sequence isn't atomic and would result in lost timer
+        * interrupts if we're not careful. Detect if a timer interrupt is due
+        * and assert it.
+        */
+       back_to_back_c0_hazard();
+       after_count = read_gc0_count();
+       if (after_count - start_count > compare - start_count - 1)
+               kvm_vz_queue_irq(vcpu, MIPS_EXC_INT_TIMER);
+}
+
+/**
+ * kvm_vz_restore_timer() - Restore timer state.
  * @vcpu:      Virtual CPU.
  *
  * Restore soft timer state from saved context.
        _kvm_vz_restore_stimer(vcpu, compare, cause);
 }
 
+/**
+ * kvm_vz_acquire_htimer() - Switch to hard timer state.
+ * @vcpu:      Virtual CPU.
+ *
+ * Restore hard timer state on top of existing soft timer state if possible.
+ *
+ * Since hard timer won't remain active over preemption, preemption should be
+ * disabled by the caller.
+ */
+void kvm_vz_acquire_htimer(struct kvm_vcpu *vcpu)
+{
+       u32 gctl0;
+
+       gctl0 = read_c0_guestctl0();
+       if (!(gctl0 & MIPS_GCTL0_GT) && kvm_vz_should_use_htimer(vcpu)) {
+               /* enable guest access to hard timer */
+               write_c0_guestctl0(gctl0 | MIPS_GCTL0_GT);
+
+               _kvm_vz_restore_htimer(vcpu, read_gc0_compare(),
+                                      read_gc0_cause());
+       }
+}
+
+/**
+ * _kvm_vz_save_htimer() - Switch to software emulation of guest timer.
+ * @vcpu:      Virtual CPU.
+ * @compare:   Pointer to write compare value to.
+ * @cause:     Pointer to write cause value to.
+ *
+ * Save VZ guest timer state and switch to software emulation of guest CP0
+ * timer. The hard timer must already be in use, so preemption should be
+ * disabled.
+ */
+static void _kvm_vz_save_htimer(struct kvm_vcpu *vcpu,
+                               u32 *out_compare, u32 *out_cause)
+{
+       u32 cause, compare, before_count, end_count;
+       ktime_t before_time;
+
+       compare = read_gc0_compare();
+       *out_compare = compare;
+
+       before_time = ktime_get();
+
+       /*
+        * Record the CP0_Count *prior* to saving CP0_Cause, so we have a time
+        * at which no pending timer interrupt is missing.
+        */
+       before_count = read_gc0_count();
+       back_to_back_c0_hazard();
+       cause = read_gc0_cause();
+       *out_cause = cause;
+
+       /*
+        * Record a final CP0_Count which we will transfer to the soft-timer.
+        * This is recorded *after* saving CP0_Cause, so we don't get any timer
+        * interrupts from just after the final CP0_Count point.
+        */
+       back_to_back_c0_hazard();
+       end_count = read_gc0_count();
+
+       /*
+        * The above sequence isn't atomic, so we could miss a timer interrupt
+        * between reading CP0_Cause and end_count. Detect and record any timer
+        * interrupt due between before_count and end_count.
+        */
+       if (end_count - before_count > compare - before_count - 1)
+               kvm_vz_queue_irq(vcpu, MIPS_EXC_INT_TIMER);
+
+       /*
+        * Restore soft-timer, ignoring a small amount of negative drift due to
+        * delay between freeze_hrtimer and setting CP0_GTOffset.
+        */
+       kvm_mips_restore_hrtimer(vcpu, before_time, end_count, -0x10000);
+}
+
 /**
  * kvm_vz_save_timer() - Save guest timer state.
  * @vcpu:      Virtual CPU.
  *
- * Save VZ guest timer state.
+ * Save VZ guest timer state and switch to soft guest timer if hard timer was in
+ * use.
  */
 static void kvm_vz_save_timer(struct kvm_vcpu *vcpu)
 {
        struct mips_coproc *cop0 = vcpu->arch.cop0;
-       u32 compare, cause;
+       u32 gctl0, compare, cause;
 
-       compare = read_gc0_compare();
-       cause = read_gc0_cause();
+       gctl0 = read_c0_guestctl0();
+       if (gctl0 & MIPS_GCTL0_GT) {
+               /* disable guest use of hard timer */
+               write_c0_guestctl0(gctl0 & ~MIPS_GCTL0_GT);
+
+               /* save hard timer state */
+               _kvm_vz_save_htimer(vcpu, &compare, &cause);
+       } else {
+               compare = read_gc0_compare();
+               cause = read_gc0_cause();
+       }
 
        /* save timer-related state to VCPU context */
        kvm_write_sw_gc0_cause(cop0, cause);
        kvm_write_sw_gc0_compare(cop0, compare);
 }
 
+/**
+ * kvm_vz_lose_htimer() - Ensure hard guest timer is not in use.
+ * @vcpu:      Virtual CPU.
+ *
+ * Transfers the state of the hard guest timer to the soft guest timer, leaving
+ * guest state intact so it can continue to be used with the soft timer.
+ */
+void kvm_vz_lose_htimer(struct kvm_vcpu *vcpu)
+{
+       u32 gctl0, compare, cause;
+
+       preempt_disable();
+       gctl0 = read_c0_guestctl0();
+       if (gctl0 & MIPS_GCTL0_GT) {
+               /* disable guest use of timer */
+               write_c0_guestctl0(gctl0 & ~MIPS_GCTL0_GT);
+
+               /* switch to soft timer */
+               _kvm_vz_save_htimer(vcpu, &compare, &cause);
+
+               /* leave soft timer in usable state */
+               _kvm_vz_restore_stimer(vcpu, compare, cause);
+       }
+       preempt_enable();
+}
+
 /**
  * is_eva_access() - Find whether an instruction is an EVA memory accessor.
  * @inst:      32-bit instruction encoding.
 
                        if (rd == MIPS_CP0_COUNT &&
                            sel == 0) {                 /* Count */
+                               kvm_vz_lose_htimer(vcpu);
                                kvm_mips_write_count(vcpu, vcpu->arch.gprs[rt]);
                        } else if (rd == MIPS_CP0_COMPARE &&
                                   sel == 0) {          /* Compare */
 
                        /* DC bit enabling/disabling timer? */
                        if (change & CAUSEF_DC) {
-                               if (val & CAUSEF_DC)
+                               if (val & CAUSEF_DC) {
+                                       kvm_vz_lose_htimer(vcpu);
                                        kvm_mips_count_disable_cause(vcpu);
-                               else
+                               } else {
                                        kvm_mips_count_enable_cause(vcpu);
+                               }
                        }
 
                        /* Only certain bits are RW to the guest */
        int cpu = smp_processor_id();
        int r;
 
+       kvm_vz_acquire_htimer(vcpu);
        /* Check if we have any exceptions/interrupts pending */
        kvm_mips_deliver_interrupts(vcpu, read_gc0_cause());