]> www.infradead.org Git - users/willy/xarray.git/commitdiff
iommufd: Add iommufd_object_tombstone_user() helper
authorXu Yilun <yilun.xu@linux.intel.com>
Wed, 16 Jul 2025 07:03:43 +0000 (15:03 +0800)
committerJason Gunthorpe <jgg@nvidia.com>
Fri, 18 Jul 2025 20:33:08 +0000 (17:33 -0300)
Add the iommufd_object_tombstone_user() helper, which allows the caller
to destroy an iommufd object created by userspace.

This is useful on some destroy paths when the kernel caller finds the
object should have been removed by userspace but is still alive. With
this helper, the caller destroys the object but leave the object ID
reserved (so called tombstone). The tombstone prevents repurposing the
object ID without awareness of the original user.

Since this happens for abnormal userspace behavior, for simplicity, the
tombstoned object ID would be permanently leaked until
iommufd_fops_release(). I.e. the original user gets an error when
calling ioctl(IOMMU_DESTROY) on that ID.

The first use case would be to ensure the iommufd_vdevice can't outlive
the associated iommufd_device.

Link: https://patch.msgid.link/r/20250716070349.1807226-3-yilun.xu@linux.intel.com
Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Nicolin Chen <nicolinc@nvidia.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Co-developed-by: "Aneesh Kumar K.V (Arm)" <aneesh.kumar@kernel.org>
Signed-off-by: "Aneesh Kumar K.V (Arm)" <aneesh.kumar@kernel.org>
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Xu Yilun <yilun.xu@linux.intel.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
drivers/iommu/iommufd/iommufd_private.h
drivers/iommu/iommufd/main.c

index cd14163abdd158405308df09a59ea500f2196e24..1495450600293305b99b9787aa91d93fc5ad9334 100644 (file)
@@ -202,7 +202,8 @@ void iommufd_object_finalize(struct iommufd_ctx *ictx,
                             struct iommufd_object *obj);
 
 enum {
-       REMOVE_WAIT_SHORTTERM = 1,
+       REMOVE_WAIT_SHORTTERM   = BIT(0),
+       REMOVE_OBJ_TOMBSTONE    = BIT(1),
 };
 int iommufd_object_remove(struct iommufd_ctx *ictx,
                          struct iommufd_object *to_destroy, u32 id,
@@ -228,6 +229,26 @@ static inline void iommufd_object_destroy_user(struct iommufd_ctx *ictx,
        WARN_ON(ret);
 }
 
+/*
+ * Similar to iommufd_object_destroy_user(), except that the object ID is left
+ * reserved/tombstoned.
+ */
+static inline void iommufd_object_tombstone_user(struct iommufd_ctx *ictx,
+                                                struct iommufd_object *obj)
+{
+       int ret;
+
+       ret = iommufd_object_remove(ictx, obj, obj->id,
+                                   REMOVE_WAIT_SHORTTERM | REMOVE_OBJ_TOMBSTONE);
+
+       /*
+        * If there is a bug and we couldn't destroy the object then we did put
+        * back the caller's users refcount and will eventually try to free it
+        * again during close.
+        */
+       WARN_ON(ret);
+}
+
 /*
  * The HWPT allocated by autodomains is used in possibly many devices and
  * is automatically destroyed when its refcount reaches zero.
index 69c2195e77cad7e7e3941aa03b75c9e334c7de89..71135f0ec72df7d1e33ebbad0b696a8df8e45990 100644 (file)
@@ -225,7 +225,7 @@ int iommufd_object_remove(struct iommufd_ctx *ictx,
                goto err_xa;
        }
 
-       xas_store(&xas, NULL);
+       xas_store(&xas, (flags & REMOVE_OBJ_TOMBSTONE) ? XA_ZERO_ENTRY : NULL);
        if (ictx->vfio_ioas == container_of(obj, struct iommufd_ioas, obj))
                ictx->vfio_ioas = NULL;
        xa_unlock(&ictx->objects);
@@ -311,19 +311,41 @@ static int iommufd_fops_release(struct inode *inode, struct file *filp)
        while (!xa_empty(&ictx->objects)) {
                unsigned int destroyed = 0;
                unsigned long index;
+               bool empty = true;
 
+               /*
+                * We can't use xa_empty() to end the loop as the tombstones
+                * are stored as XA_ZERO_ENTRY in the xarray. However
+                * xa_for_each() automatically converts them to NULL and skips
+                * them causing xa_empty() to be kept false. Thus once
+                * xa_for_each() finds no further !NULL entries the loop is
+                * done.
+                */
                xa_for_each(&ictx->objects, index, obj) {
+                       empty = false;
                        if (!refcount_dec_if_one(&obj->users))
                                continue;
+
                        destroyed++;
                        xa_erase(&ictx->objects, index);
                        iommufd_object_ops[obj->type].destroy(obj);
                        kfree(obj);
                }
+
+               if (empty)
+                       break;
+
                /* Bug related to users refcount */
                if (WARN_ON(!destroyed))
                        break;
        }
+
+       /*
+        * There may be some tombstones left over from
+        * iommufd_object_tombstone_user()
+        */
+       xa_destroy(&ictx->objects);
+
        WARN_ON(!xa_empty(&ictx->groups));
 
        mutex_destroy(&ictx->sw_msi_lock);