]> www.infradead.org Git - users/dwmw2/linux.git/commitdiff
drm/ttm: Schedule delayed_delete worker closer
authorRajneesh Bhardwaj <rajneesh.bhardwaj@amd.com>
Sat, 11 Nov 2023 13:08:56 +0000 (08:08 -0500)
committerChristian König <christian.koenig@amd.com>
Mon, 27 Nov 2023 09:58:36 +0000 (10:58 +0100)
Try to allocate system memory on the NUMA node the device is closest to
and try to run delayed_delete workers on a CPU of this node as well.

To optimize the memory clearing operation when a TTM BO gets freed by
the delayed_delete worker, scheduling it closer to a NUMA node where the
memory was initially allocated helps avoid the cases where the worker
gets randomly scheduled on the CPU cores that are across interconnect
boundaries such as xGMI, PCIe etc.

This change helps USWC GTT allocations on NUMA systems (dGPU) and AMD
APU platforms such as GFXIP9.4.3.

Signed-off-by: Rajneesh Bhardwaj <rajneesh.bhardwaj@amd.com>
Acked-by: Felix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231111130856.1168304-1-rajneesh.bhardwaj@amd.com
Signed-off-by: Christian König <christian.koenig@amd.com>
drivers/gpu/drm/ttm/ttm_bo.c
drivers/gpu/drm/ttm/ttm_device.c

index e58b7e2498166af73a105f234ba4f9ec0fcc2195..edf10618fe2b22f70d88d18548bb0475643afee2 100644 (file)
@@ -370,7 +370,13 @@ static void ttm_bo_release(struct kref *kref)
                        spin_unlock(&bo->bdev->lru_lock);
 
                        INIT_WORK(&bo->delayed_delete, ttm_bo_delayed_delete);
-                       queue_work(bdev->wq, &bo->delayed_delete);
+
+                       /* Schedule the worker on the closest NUMA node. This
+                        * improves performance since system memory might be
+                        * cleared on free and that is best done on a CPU core
+                        * close to it.
+                        */
+                       queue_work_node(bdev->pool.nid, bdev->wq, &bo->delayed_delete);
                        return;
                }
 
index d48b39132b32427e089c01c272d237ec90f3c40b..f5187b384ae9ac8eedede8e6a0d4d56eb8af1670 100644 (file)
@@ -204,7 +204,8 @@ int ttm_device_init(struct ttm_device *bdev, const struct ttm_device_funcs *func
        if (ret)
                return ret;
 
-       bdev->wq = alloc_workqueue("ttm", WQ_MEM_RECLAIM | WQ_HIGHPRI, 16);
+       bdev->wq = alloc_workqueue("ttm",
+                                  WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_UNBOUND, 16);
        if (!bdev->wq) {
                ttm_global_release();
                return -ENOMEM;
@@ -213,7 +214,8 @@ int ttm_device_init(struct ttm_device *bdev, const struct ttm_device_funcs *func
        bdev->funcs = funcs;
 
        ttm_sys_man_init(bdev);
-       ttm_pool_init(&bdev->pool, dev, NUMA_NO_NODE, use_dma_alloc, use_dma32);
+
+       ttm_pool_init(&bdev->pool, dev, dev_to_node(dev), use_dma_alloc, use_dma32);
 
        bdev->vma_manager = vma_manager;
        spin_lock_init(&bdev->lru_lock);