]> www.infradead.org Git - nvme.git/commitdiff
KVM: Treat the device list as an rculist
authorOliver Upton <oliver.upton@linux.dev>
Mon, 22 Apr 2024 20:01:40 +0000 (20:01 +0000)
committerMarc Zyngier <maz@kernel.org>
Thu, 25 Apr 2024 12:19:55 +0000 (13:19 +0100)
A subsequent change to KVM/arm64 will necessitate walking the device
list outside of the kvm->lock. Prepare by converting to an rculist. This
has zero effect on the VM destruction path, as it is expected every
reader is backed by a reference on the kvm struct.

On the other hand, ensure a given device is completely destroyed before
dropping the kvm->lock in the release() path, as certain devices expect
to be a singleton (e.g. the vfio-kvm device).

Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Sean Christopherson <seanjc@google.com>
Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
Reviewed-by: Sean Christopherson <seanjc@google.com>
Link: https://lore.kernel.org/r/20240422200158.2606761-2-oliver.upton@linux.dev
Signed-off-by: Marc Zyngier <maz@kernel.org>
virt/kvm/kvm_main.c
virt/kvm/vfio.c

index fb49c2a602002ed30a5f426203fa0e30be2436b0..6c09fe40948f864a056552fbecffee97d24e1b29 100644 (file)
@@ -1329,6 +1329,12 @@ static void kvm_destroy_devices(struct kvm *kvm)
         * We do not need to take the kvm->lock here, because nobody else
         * has a reference to the struct kvm at this point and therefore
         * cannot access the devices list anyhow.
+        *
+        * The device list is generally managed as an rculist, but list_del()
+        * is used intentionally here. If a bug in KVM introduced a reader that
+        * was not backed by a reference on the kvm struct, the hope is that
+        * it'd consume the poisoned forward pointer instead of suffering a
+        * use-after-free, even though this cannot be guaranteed.
         */
        list_for_each_entry_safe(dev, tmp, &kvm->devices, vm_node) {
                list_del(&dev->vm_node);
@@ -4725,7 +4731,8 @@ static int kvm_device_release(struct inode *inode, struct file *filp)
 
        if (dev->ops->release) {
                mutex_lock(&kvm->lock);
-               list_del(&dev->vm_node);
+               list_del_rcu(&dev->vm_node);
+               synchronize_rcu();
                dev->ops->release(dev);
                mutex_unlock(&kvm->lock);
        }
@@ -4808,7 +4815,7 @@ static int kvm_ioctl_create_device(struct kvm *kvm,
                kfree(dev);
                return ret;
        }
-       list_add(&dev->vm_node, &kvm->devices);
+       list_add_rcu(&dev->vm_node, &kvm->devices);
        mutex_unlock(&kvm->lock);
 
        if (ops->init)
@@ -4819,7 +4826,8 @@ static int kvm_ioctl_create_device(struct kvm *kvm,
        if (ret < 0) {
                kvm_put_kvm_no_destroy(kvm);
                mutex_lock(&kvm->lock);
-               list_del(&dev->vm_node);
+               list_del_rcu(&dev->vm_node);
+               synchronize_rcu();
                if (ops->release)
                        ops->release(dev);
                mutex_unlock(&kvm->lock);
index ca24ce1209068e0f50577318b84c67a59eaf1ee5..76b7f6085dcd7951482ae3bd0ac69e9a69ad9a20 100644 (file)
@@ -366,6 +366,8 @@ static int kvm_vfio_create(struct kvm_device *dev, u32 type)
        struct kvm_device *tmp;
        struct kvm_vfio *kv;
 
+       lockdep_assert_held(&dev->kvm->lock);
+
        /* Only one VFIO "device" per VM */
        list_for_each_entry(tmp, &dev->kvm->devices, vm_node)
                if (tmp->ops == &kvm_vfio_ops)