]> www.infradead.org Git - users/dwmw2/linux.git/commitdiff
KVM: x86/xen: avoid blocking in hardirq context in kvm_xen_set_evtchn_fast() xen
authorDavid Woodhouse <dwmw@amazon.co.uk>
Thu, 7 Mar 2024 16:43:51 +0000 (16:43 +0000)
committerDavid Woodhouse <dwmw@amazon.co.uk>
Fri, 8 Mar 2024 18:09:22 +0000 (18:09 +0000)
As described in [1] compiling with CONFIG_PROVE_RAW_LOCK_NESTING shows that
kvm_xen_set_evtchn_fast() is blocking on pfncache locks in IRQ context.
There is only actually blocking with PREEMPT_RT because the locks will
turned into mutexes. There is no 'raw' version of rwlock_t that can be used
to avoid that, so use read_trylock() and treat failure to lock the same as
an invalid cache.

However, with PREEMPT_RT read_lock_irqsave() doesn't actually disable IRQs
either. So open-coding a trylock_irqsave() using local_irq_save() would be
wrong in the PREEMPT_RT case.

In fact, if kvm_xen_set_evtchn_fast() is now going to use a trylock anyway
when invoked in hardirq context, there's actually no *need* for interrupts
to be disabled by any other code path that takes the lock. So just switch
all other users to a plain read_lock() and then the different semantics
on PREEMPT_RT are not an issue.

Add a warning in __kvm_xen_has_interrupt() just to be sure that one never
does occur from interrupt context.

[1] https://lore.kernel.org/lkml/99771ef3a4966a01fefd3adbb2ba9c3a75f97cf2.camel@infradead.org/T/#mbd06e5a04534ce9c0ee94bd8f1e8d942b2d45bd6
Fixes: 77c9b9dea4fb ("KVM: x86/xen: Use fast path for Xen timer delivery")
Fixes: bbe17c625d68 ("KVM: x86/xen: Fix potential deadlock in kvm_xen_update_runstate_guest()")
Co-developed-by: Paul Durrant <pdurrant@amazon.com>
Signed-off-by: Paul Durrant <pdurrant@amazon.com>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
arch/x86/kvm/xen.c

index f8113eb8d0408af425b515341d74e0c13c8e545e..ab3bbe75602d0a1e4426ca70672a2b1032e6bd0c 100644 (file)
@@ -45,7 +45,7 @@ static int kvm_xen_shared_info_init(struct kvm *kvm)
        int ret = 0;
        int idx = srcu_read_lock(&kvm->srcu);
 
-       read_lock_irq(&gpc->lock);
+       read_lock(&gpc->lock);
        while (!kvm_gpc_check(gpc, PAGE_SIZE)) {
                read_unlock_irq(&gpc->lock);
 
@@ -53,7 +53,7 @@ static int kvm_xen_shared_info_init(struct kvm *kvm)
                if (ret)
                        goto out;
 
-               read_lock_irq(&gpc->lock);
+               read_lock(&gpc->lock);
        }
 
        /*
@@ -96,7 +96,7 @@ static int kvm_xen_shared_info_init(struct kvm *kvm)
        smp_wmb();
 
        wc->version = wc_version + 1;
-       read_unlock_irq(&gpc->lock);
+       read_unlock(&gpc->lock);
 
        kvm_make_all_cpus_request(kvm, KVM_REQ_MASTERCLOCK_UPDATE);
 
@@ -277,7 +277,6 @@ static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic)
        struct gfn_to_pfn_cache *gpc2 = &vx->runstate2_cache;
        size_t user_len, user_len1, user_len2;
        struct vcpu_runstate_info rs;
-       unsigned long flags;
        size_t times_ofs;
        uint8_t *update_bit = NULL;
        uint64_t entry_time;
@@ -373,16 +372,13 @@ static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic)
         * gfn_to_pfn caches that cover the region.
         */
        if (atomic) {
-               local_irq_save(flags);
-               if (!read_trylock(&gpc1->lock)) {
-                       local_irq_restore(flags);
+               if (!read_trylock(&gpc1->lock))
                        return;
-               }
        } else {
-               read_lock_irqsave(&gpc1->lock, flags);
+               read_lock(&gpc1->lock);
        }
        while (!kvm_gpc_check(gpc1, user_len1)) {
-               read_unlock_irqrestore(&gpc1->lock, flags);
+               read_unlock(&gpc1->lock);
 
                /* When invoked from kvm_sched_out() we cannot sleep */
                if (atomic)
@@ -391,7 +387,7 @@ static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic)
                if (kvm_gpc_refresh(gpc1, user_len1))
                        return;
 
-               read_lock_irqsave(&gpc1->lock, flags);
+               read_lock(&gpc1->lock);
        }
 
        if (likely(!user_len2)) {
@@ -419,7 +415,7 @@ static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic)
                lock_set_subclass(&gpc1->lock.dep_map, 1, _THIS_IP_);
                if (atomic) {
                        if (!read_trylock(&gpc2->lock)) {
-                               read_unlock_irqrestore(&gpc1->lock, flags);
+                               read_unlock(&gpc1->lock);
                                return;
                        }
                } else {
@@ -428,7 +424,7 @@ static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic)
 
                if (!kvm_gpc_check(gpc2, user_len2)) {
                        read_unlock(&gpc2->lock);
-                       read_unlock_irqrestore(&gpc1->lock, flags);
+                       read_unlock(&gpc1->lock);
 
                        /* When invoked from kvm_sched_out() we cannot sleep */
                        if (atomic)
@@ -533,7 +529,7 @@ static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic)
        }
 
        kvm_gpc_mark_dirty_in_slot(gpc1);
-       read_unlock_irqrestore(&gpc1->lock, flags);
+       read_unlock(&gpc1->lock);
 }
 
 void kvm_xen_update_runstate(struct kvm_vcpu *v, int state)
@@ -592,7 +588,6 @@ void kvm_xen_inject_pending_events(struct kvm_vcpu *v)
 {
        unsigned long evtchn_pending_sel = READ_ONCE(v->arch.xen.evtchn_pending_sel);
        struct gfn_to_pfn_cache *gpc = &v->arch.xen.vcpu_info_cache;
-       unsigned long flags;
 
        if (!evtchn_pending_sel)
                return;
@@ -602,14 +597,14 @@ void kvm_xen_inject_pending_events(struct kvm_vcpu *v)
         * does anyway. Page it in and retry the instruction. We're just a
         * little more honest about it.
         */
-       read_lock_irqsave(&gpc->lock, flags);
+       read_lock(&gpc->lock);
        while (!kvm_gpc_check(gpc, sizeof(struct vcpu_info))) {
-               read_unlock_irqrestore(&gpc->lock, flags);
+               read_unlock(&gpc->lock);
 
                if (kvm_gpc_refresh(gpc, sizeof(struct vcpu_info)))
                        return;
 
-               read_lock_irqsave(&gpc->lock, flags);
+               read_lock(&gpc->lock);
        }
 
        /* Now gpc->khva is a valid kernel address for the vcpu_info */
@@ -639,7 +634,7 @@ void kvm_xen_inject_pending_events(struct kvm_vcpu *v)
        }
 
        kvm_gpc_mark_dirty_in_slot(gpc);
-       read_unlock_irqrestore(&gpc->lock, flags);
+       read_unlock(&gpc->lock);
 
        /* For the per-vCPU lapic vector, deliver it as MSI. */
        if (v->arch.xen.upcall_vector)
@@ -649,7 +644,6 @@ void kvm_xen_inject_pending_events(struct kvm_vcpu *v)
 int __kvm_xen_has_interrupt(struct kvm_vcpu *v)
 {
        struct gfn_to_pfn_cache *gpc = &v->arch.xen.vcpu_info_cache;
-       unsigned long flags;
        u8 rc = 0;
 
        /*
@@ -665,9 +659,10 @@ int __kvm_xen_has_interrupt(struct kvm_vcpu *v)
        BUILD_BUG_ON(sizeof(rc) !=
                     sizeof_field(struct compat_vcpu_info, evtchn_upcall_pending));
 
-       read_lock_irqsave(&gpc->lock, flags);
+       WARN_ON_ONCE(in_interrupt());
+       read_lock(&gpc->lock);
        while (!kvm_gpc_check(gpc, sizeof(struct vcpu_info))) {
-               read_unlock_irqrestore(&gpc->lock, flags);
+               read_unlock(&gpc->lock);
 
                /*
                 * This function gets called from kvm_vcpu_block() after setting the
@@ -687,11 +682,11 @@ int __kvm_xen_has_interrupt(struct kvm_vcpu *v)
                         */
                        return 0;
                }
-               read_lock_irqsave(&gpc->lock, flags);
+               read_lock(&gpc->lock);
        }
 
        rc = ((struct vcpu_info *)gpc->khva)->evtchn_upcall_pending;
-       read_unlock_irqrestore(&gpc->lock, flags);
+       read_unlock(&gpc->lock);
        return rc;
 }
 
@@ -1382,12 +1377,11 @@ static bool wait_pending_event(struct kvm_vcpu *vcpu, int nr_ports,
        struct kvm *kvm = vcpu->kvm;
        struct gfn_to_pfn_cache *gpc = &kvm->arch.xen.shinfo_cache;
        unsigned long *pending_bits;
-       unsigned long flags;
        bool ret = true;
        int idx, i;
 
        idx = srcu_read_lock(&kvm->srcu);
-       read_lock_irqsave(&gpc->lock, flags);
+       read_lock(&gpc->lock);
        if (!kvm_gpc_check(gpc, PAGE_SIZE))
                goto out_rcu;
 
@@ -1408,7 +1402,7 @@ static bool wait_pending_event(struct kvm_vcpu *vcpu, int nr_ports,
        }
 
  out_rcu:
-       read_unlock_irqrestore(&gpc->lock, flags);
+       read_unlock(&gpc->lock);
        srcu_read_unlock(&kvm->srcu, idx);
 
        return ret;
@@ -1730,12 +1724,17 @@ static int set_shinfo_evtchn_pending(struct kvm_vcpu *vcpu, u32 port)
        struct kvm *kvm = vcpu->kvm;
        struct gfn_to_pfn_cache *gpc = &kvm->arch.xen.shinfo_cache;
        unsigned long *pending_bits, *mask_bits;
-       unsigned long flags;
        int rc = -EWOULDBLOCK;
 
-       read_lock_irqsave(&gpc->lock, flags);
+       if (in_interrupt()) {
+               if (!read_trylock(&gpc->lock))
+                       goto out;
+       } else {
+               read_lock(&gpc->lock);
+       }
+
        if (!kvm_gpc_check(gpc, PAGE_SIZE))
-               goto out;
+               goto out_unlock;
 
        if (IS_ENABLED(CONFIG_64BIT) && kvm->arch.xen.long_mode) {
                struct shared_info *shinfo = gpc->khva;
@@ -1758,8 +1757,9 @@ static int set_shinfo_evtchn_pending(struct kvm_vcpu *vcpu, u32 port)
                rc = 1; /* It is newly raised */
        }
 
+ out_unlock:
+       read_unlock(&gpc->lock);
  out:
-       read_unlock_irqrestore(&gpc->lock, flags);
        return rc;
 }
 
@@ -1767,23 +1767,23 @@ static bool set_vcpu_info_evtchn_pending(struct kvm_vcpu *vcpu, u32 port)
 {
        struct kvm *kvm = vcpu->kvm;
        struct gfn_to_pfn_cache *gpc = &vcpu->arch.xen.vcpu_info_cache;
-       unsigned long flags;
        bool kick_vcpu = false;
+       bool locked;
 
-       read_lock_irqsave(&gpc->lock, flags);
+       locked = read_trylock(&gpc->lock);
 
        /*
         * Try to deliver the event directly to the vcpu_info. If successful and
         * the guest is using upcall_vector delivery, send the MSI.
-        * If the pfncache is invalid, set the shadow. In this case, or if the
-        * guest is using another form of event delivery, the vCPU must be
-        * kicked to complete the delivery.
+        * If the pfncache lock is contended or the cache is invalid, set the
+        * shadow. In this case, or if the guest is using another form of event
+        * delivery, the vCPU must be kicked to complete the delivery.
         */
        if (IS_ENABLED(CONFIG_64BIT) && kvm->arch.xen.long_mode) {
                struct vcpu_info *vcpu_info = gpc->khva;
                int port_word_bit = port / 64;
 
-               if (!kvm_gpc_check(gpc, sizeof(*vcpu_info))) {
+               if ((!locked || !kvm_gpc_check(gpc, sizeof(*vcpu_info)))) {
                        if (!test_and_set_bit(port_word_bit, &vcpu->arch.xen.evtchn_pending_sel))
                                kick_vcpu = true;
                        goto out;
@@ -1797,7 +1797,7 @@ static bool set_vcpu_info_evtchn_pending(struct kvm_vcpu *vcpu, u32 port)
                struct compat_vcpu_info *vcpu_info = gpc->khva;
                int port_word_bit = port / 32;
 
-               if (!kvm_gpc_check(gpc, sizeof(*vcpu_info))) {
+               if ((!locked || !kvm_gpc_check(gpc, sizeof(*vcpu_info)))) {
                        if (!test_and_set_bit(port_word_bit, &vcpu->arch.xen.evtchn_pending_sel))
                                kick_vcpu = true;
                        goto out;
@@ -1816,7 +1816,9 @@ static bool set_vcpu_info_evtchn_pending(struct kvm_vcpu *vcpu, u32 port)
        }
 
  out:
-       read_unlock_irqrestore(&gpc->lock, flags);
+       if (locked)
+               read_unlock(&gpc->lock);
+
        return kick_vcpu;
 }