From 5823f0453c2a51f9e10fdb90dd9051068fe607a2 Mon Sep 17 00:00:00 2001 From: Fabien Parent Date: Fri, 10 Jan 2025 14:31:11 +0100 Subject: [PATCH 01/16] dt-bindings: display: mediatek: dpi: add power-domains example DPI is part of the display / multimedia block in MediaTek SoCs, and always have a power-domain (at least in the upstream device-trees). Add the power-domains property to the binding example. Fixes: 9273cf7d3942 ("dt-bindings: display: mediatek: convert the dpi bindings to yaml") Signed-off-by: Fabien Parent Reviewed-by: AngeloGioacchino Del Regno Acked-by: Rob Herring (Arm) Reviewed-by: CK Hu Signed-off-by: Alexandre Mergnat Link: https://patchwork.kernel.org/project/dri-devel/patch/20231023-display-support-v7-1-6703f3e26831@baylibre.com/ Signed-off-by: Chun-Kuang Hu --- .../devicetree/bindings/display/mediatek/mediatek,dpi.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/devicetree/bindings/display/mediatek/mediatek,dpi.yaml b/Documentation/devicetree/bindings/display/mediatek/mediatek,dpi.yaml index 5670715efa5c..b659d79393a8 100644 --- a/Documentation/devicetree/bindings/display/mediatek/mediatek,dpi.yaml +++ b/Documentation/devicetree/bindings/display/mediatek/mediatek,dpi.yaml @@ -121,11 +121,13 @@ examples: - | #include #include + #include dpi: dpi@1401d000 { compatible = "mediatek,mt8173-dpi"; reg = <0x1401d000 0x1000>; interrupts = ; + power-domains = <&spm MT8173_POWER_DOMAIN_MM>; clocks = <&mmsys CLK_MM_DPI_PIXEL>, <&mmsys CLK_MM_DPI_ENGINE>, <&apmixedsys CLK_APMIXED_TVDPLL>; -- 2.51.0 From cd5b6ba77705e633955fa38eb6559cc7fe484a3b Mon Sep 17 00:00:00 2001 From: Fabien Parent Date: Fri, 10 Jan 2025 14:31:13 +0100 Subject: [PATCH 02/16] drm/mediatek: add MT8365 SoC support Add DRM support for MT8365 SoC. Signed-off-by: Fabien Parent Reviewed-by: AngeloGioacchino Del Regno Signed-off-by: Alexandre Mergnat Reviewed-by: CK Hu Link: https://patchwork.kernel.org/project/dri-devel/patch/20231023-display-support-v7-3-6703f3e26831@baylibre.com/ Signed-off-by: Chun-Kuang Hu --- drivers/gpu/drm/mediatek/mtk_drm_drv.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.c b/drivers/gpu/drm/mediatek/mtk_drm_drv.c index 772c3d0f5d14..74158b9d6503 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_drv.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.c @@ -327,6 +327,10 @@ static const struct mtk_mmsys_driver_data mt8195_vdosys1_driver_data = { .min_height = 1, }; +static const struct mtk_mmsys_driver_data mt8365_mmsys_driver_data = { + .mmsys_dev_num = 1, +}; + static const struct of_device_id mtk_drm_of_ids[] = { { .compatible = "mediatek,mt2701-mmsys", .data = &mt2701_mmsys_driver_data}, @@ -354,6 +358,8 @@ static const struct of_device_id mtk_drm_of_ids[] = { .data = &mt8195_vdosys0_driver_data}, { .compatible = "mediatek,mt8195-vdosys1", .data = &mt8195_vdosys1_driver_data}, + { .compatible = "mediatek,mt8365-mmsys", + .data = &mt8365_mmsys_driver_data}, { } }; MODULE_DEVICE_TABLE(of, mtk_drm_of_ids); @@ -754,6 +760,8 @@ static const struct of_device_id mtk_ddp_comp_dt_ids[] = { .data = (void *)MTK_DISP_MUTEX }, { .compatible = "mediatek,mt8195-disp-mutex", .data = (void *)MTK_DISP_MUTEX }, + { .compatible = "mediatek,mt8365-disp-mutex", + .data = (void *)MTK_DISP_MUTEX }, { .compatible = "mediatek,mt8173-disp-od", .data = (void *)MTK_DISP_OD }, { .compatible = "mediatek,mt2701-disp-ovl", -- 2.51.0 From dcb166ee43c3d594e7b73a24f6e8cf5663eeff2c Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 8 Jan 2025 12:35:57 +0300 Subject: [PATCH 03/16] drm/mediatek: dsi: fix error codes in mtk_dsi_host_transfer() There is a type bug because the return statement: return ret < 0 ? ret : recv_cnt; The issue is that ret is an int, recv_cnt is a u32 and the function returns ssize_t, which is a signed long. The way that the type promotion works is that the negative error codes are first cast to u32 and then to signed long. The error codes end up being positive instead of negative and the callers treat them as success. Fixes: 81cc7e51c4f1 ("drm/mediatek: Allow commands to be sent during video mode") Reported-by: kernel test robot Closes: https://lore.kernel.org/r/202412210801.iADw0oIH-lkp@intel.com/ Signed-off-by: Dan Carpenter Reviewed-by: Mattijs Korpershoek Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: CK Hu Link: https://patchwork.kernel.org/project/dri-devel/patch/b754a408-4f39-4e37-b52d-7706c132e27f@stanley.mountain/ Signed-off-by: Chun-Kuang Hu --- drivers/gpu/drm/mediatek/mtk_dsi.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/mediatek/mtk_dsi.c b/drivers/gpu/drm/mediatek/mtk_dsi.c index f298293471de..cea995663b76 100644 --- a/drivers/gpu/drm/mediatek/mtk_dsi.c +++ b/drivers/gpu/drm/mediatek/mtk_dsi.c @@ -1116,12 +1116,12 @@ static ssize_t mtk_dsi_host_transfer(struct mipi_dsi_host *host, const struct mipi_dsi_msg *msg) { struct mtk_dsi *dsi = host_to_dsi(host); - u32 recv_cnt, i; + ssize_t recv_cnt; u8 read_data[16]; void *src_addr; u8 irq_flag = CMD_DONE_INT_FLAG; u32 dsi_mode; - int ret; + int ret, i; dsi_mode = readl(dsi->regs + DSI_MODE_CTRL); if (dsi_mode & MODE) { @@ -1170,7 +1170,7 @@ static ssize_t mtk_dsi_host_transfer(struct mipi_dsi_host *host, if (recv_cnt) memcpy(msg->rx_buf, src_addr, recv_cnt); - DRM_INFO("dsi get %d byte data from the panel address(0x%x)\n", + DRM_INFO("dsi get %zd byte data from the panel address(0x%x)\n", recv_cnt, *((u8 *)(msg->tx_buf))); restore_dsi_mode: -- 2.51.0 From 4b22f19cc7a9c671e0677db8bad2560d7ea2dba4 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Thu, 6 Mar 2025 17:06:43 -0500 Subject: [PATCH 04/16] drm/xe/guc_pc: Remove duplicated pc_start call xe_guc_pc_start() was getting called from both xe_uc_init_hw() and from xe_guc_start(). But both are called from do_gt_restart() and only xe_uc_init_hw() is called at initialization. So, let's remove the duplication in the regular gt_restart path. The only place where xe_guc_pc_start() won't get called now is on the gt_reset failure path. However, if gt_reset has failed, it is really unlikely that the PC start will work or is desired. Cc: Vinay Belgaumkar Reviewed-by: Jonathan Cavitt Reviewed-by: Vinay Belgaumkar Link: https://patchwork.freedesktop.org/patch/msgid/20250306220643.1014049-1-rodrigo.vivi@intel.com Signed-off-by: Rodrigo Vivi (cherry picked from commit fc858ddf9c68696537cec530d2d48bf6ed06ea92) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_guc.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index bc1ff0a4e1e7..bc5714a5b36b 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -1496,14 +1496,6 @@ void xe_guc_stop(struct xe_guc *guc) int xe_guc_start(struct xe_guc *guc) { - if (!IS_SRIOV_VF(guc_to_xe(guc))) { - int err; - - err = xe_guc_pc_start(&guc->pc); - xe_gt_WARN(guc_to_gt(guc), err, "Failed to start GuC PC: %pe\n", - ERR_PTR(err)); - } - return xe_guc_submit_start(guc); } -- 2.51.0 From fd6c10e67b2986b68f0294cae584f873f7a2478c Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 7 Mar 2025 17:14:28 -0800 Subject: [PATCH 05/16] drm/gpusvm: Fix kernel-doc MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Due to wrong `.. kernel-doc` directive in Documentation/gpu/rfc/gpusvm.rst the documentation was actually not parsing anything from drivers/gpu/drm/drm_gpusvm.c. This fixes the kernel-doc include and all warnings/errors created when doing so. Cc: Simona Vetter Cc: Dave Airlie Cc: Christian König Cc: dri-devel@lists.freedesktop.org Cc: Matthew Brost Cc: Thomas Hellström Reported-by: Stephen Rothwell Closes: https://lore.kernel.org/intel-xe/20250307195239.57abcd2d@canb.auug.org.au/ Fixes: 99624bdff867 ("drm/gpusvm: Add support for GPU Shared Virtual Memory") Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20250307-fix-svm-kerneldoc-v2-1-03c74b199620@intel.com Signed-off-by: Lucas De Marchi (cherry picked from commit 4da1fb61e02a783fdd7eb725ea03d897b8ef19ea) Signed-off-by: Lucas De Marchi --- Documentation/gpu/rfc/gpusvm.rst | 15 ++-- drivers/gpu/drm/drm_gpusvm.c | 124 +++++++++++++++++-------------- 2 files changed, 79 insertions(+), 60 deletions(-) diff --git a/Documentation/gpu/rfc/gpusvm.rst b/Documentation/gpu/rfc/gpusvm.rst index 073e46065d9c..bcf66a8137a6 100644 --- a/Documentation/gpu/rfc/gpusvm.rst +++ b/Documentation/gpu/rfc/gpusvm.rst @@ -67,14 +67,19 @@ Agreed upon design principles Overview of baseline design =========================== -Baseline design is simple as possible to get a working basline in which can be -built upon. - -.. kernel-doc:: drivers/gpu/drm/xe/drm_gpusvm.c +.. kernel-doc:: drivers/gpu/drm/drm_gpusvm.c :doc: Overview + +.. kernel-doc:: drivers/gpu/drm/drm_gpusvm.c :doc: Locking - :doc: Migrataion + +.. kernel-doc:: drivers/gpu/drm/drm_gpusvm.c + :doc: Migration + +.. kernel-doc:: drivers/gpu/drm/drm_gpusvm.c :doc: Partial Unmapping of Ranges + +.. kernel-doc:: drivers/gpu/drm/drm_gpusvm.c :doc: Examples Possible future design features diff --git a/drivers/gpu/drm/drm_gpusvm.c b/drivers/gpu/drm/drm_gpusvm.c index f314f5c4af0f..2451c816edd5 100644 --- a/drivers/gpu/drm/drm_gpusvm.c +++ b/drivers/gpu/drm/drm_gpusvm.c @@ -23,37 +23,42 @@ * DOC: Overview * * GPU Shared Virtual Memory (GPU SVM) layer for the Direct Rendering Manager (DRM) - * - * The GPU SVM layer is a component of the DRM framework designed to manage shared - * virtual memory between the CPU and GPU. It enables efficient data exchange and - * processing for GPU-accelerated applications by allowing memory sharing and + * is a component of the DRM framework designed to manage shared virtual memory + * between the CPU and GPU. It enables efficient data exchange and processing + * for GPU-accelerated applications by allowing memory sharing and * synchronization between the CPU's and GPU's virtual address spaces. * * Key GPU SVM Components: - * - Notifiers: Notifiers: Used for tracking memory intervals and notifying the - * GPU of changes, notifiers are sized based on a GPU SVM - * initialization parameter, with a recommendation of 512M or - * larger. They maintain a Red-BlacK tree and a list of ranges that - * fall within the notifier interval. Notifiers are tracked within - * a GPU SVM Red-BlacK tree and list and are dynamically inserted - * or removed as ranges within the interval are created or - * destroyed. - * - Ranges: Represent memory ranges mapped in a DRM device and managed - * by GPU SVM. They are sized based on an array of chunk sizes, which - * is a GPU SVM initialization parameter, and the CPU address space. - * Upon GPU fault, the largest aligned chunk that fits within the - * faulting CPU address space is chosen for the range size. Ranges are - * expected to be dynamically allocated on GPU fault and removed on an - * MMU notifier UNMAP event. As mentioned above, ranges are tracked in - * a notifier's Red-Black tree. - * - Operations: Define the interface for driver-specific GPU SVM operations - * such as range allocation, notifier allocation, and - * invalidations. - * - Device Memory Allocations: Embedded structure containing enough information - * for GPU SVM to migrate to / from device memory. - * - Device Memory Operations: Define the interface for driver-specific device - * memory operations release memory, populate pfns, - * and copy to / from device memory. + * + * - Notifiers: + * Used for tracking memory intervals and notifying the GPU of changes, + * notifiers are sized based on a GPU SVM initialization parameter, with a + * recommendation of 512M or larger. They maintain a Red-BlacK tree and a + * list of ranges that fall within the notifier interval. Notifiers are + * tracked within a GPU SVM Red-BlacK tree and list and are dynamically + * inserted or removed as ranges within the interval are created or + * destroyed. + * - Ranges: + * Represent memory ranges mapped in a DRM device and managed by GPU SVM. + * They are sized based on an array of chunk sizes, which is a GPU SVM + * initialization parameter, and the CPU address space. Upon GPU fault, + * the largest aligned chunk that fits within the faulting CPU address + * space is chosen for the range size. Ranges are expected to be + * dynamically allocated on GPU fault and removed on an MMU notifier UNMAP + * event. As mentioned above, ranges are tracked in a notifier's Red-Black + * tree. + * + * - Operations: + * Define the interface for driver-specific GPU SVM operations such as + * range allocation, notifier allocation, and invalidations. + * + * - Device Memory Allocations: + * Embedded structure containing enough information for GPU SVM to migrate + * to / from device memory. + * + * - Device Memory Operations: + * Define the interface for driver-specific device memory operations + * release memory, populate pfns, and copy to / from device memory. * * This layer provides interfaces for allocating, mapping, migrating, and * releasing memory ranges between the CPU and GPU. It handles all core memory @@ -63,14 +68,18 @@ * below. * * Expected Driver Components: - * - GPU page fault handler: Used to create ranges and notifiers based on the - * fault address, optionally migrate the range to - * device memory, and create GPU bindings. - * - Garbage collector: Used to unmap and destroy GPU bindings for ranges. - * Ranges are expected to be added to the garbage collector - * upon a MMU_NOTIFY_UNMAP event in notifier callback. - * - Notifier callback: Used to invalidate and DMA unmap GPU bindings for - * ranges. + * + * - GPU page fault handler: + * Used to create ranges and notifiers based on the fault address, + * optionally migrate the range to device memory, and create GPU bindings. + * + * - Garbage collector: + * Used to unmap and destroy GPU bindings for ranges. Ranges are expected + * to be added to the garbage collector upon a MMU_NOTIFY_UNMAP event in + * notifier callback. + * + * - Notifier callback: + * Used to invalidate and DMA unmap GPU bindings for ranges. */ /** @@ -83,9 +92,9 @@ * range RB tree and list, as well as the range's DMA mappings and sequence * number. GPU SVM manages all necessary locking and unlocking operations, * except for the recheck range's pages being valid - * (drm_gpusvm_range_pages_valid) when the driver is committing GPU bindings. This - * lock corresponds to the 'driver->update' lock mentioned in the HMM - * documentation (TODO: Link). Future revisions may transition from a GPU SVM + * (drm_gpusvm_range_pages_valid) when the driver is committing GPU bindings. + * This lock corresponds to the ``driver->update`` lock mentioned in + * Documentation/mm/hmm.rst. Future revisions may transition from a GPU SVM * global lock to a per-notifier lock if finer-grained locking is deemed * necessary. * @@ -102,11 +111,11 @@ * DOC: Migration * * The migration support is quite simple, allowing migration between RAM and - * device memory at the range granularity. For example, GPU SVM currently does not - * support mixing RAM and device memory pages within a range. This means that upon GPU - * fault, the entire range can be migrated to device memory, and upon CPU fault, the - * entire range is migrated to RAM. Mixed RAM and device memory storage within a range - * could be added in the future if required. + * device memory at the range granularity. For example, GPU SVM currently does + * not support mixing RAM and device memory pages within a range. This means + * that upon GPU fault, the entire range can be migrated to device memory, and + * upon CPU fault, the entire range is migrated to RAM. Mixed RAM and device + * memory storage within a range could be added in the future if required. * * The reasoning for only supporting range granularity is as follows: it * simplifies the implementation, and range sizes are driver-defined and should @@ -119,11 +128,11 @@ * Partial unmapping of ranges (e.g., 1M out of 2M is unmapped by CPU resulting * in MMU_NOTIFY_UNMAP event) presents several challenges, with the main one * being that a subset of the range still has CPU and GPU mappings. If the - * backing store for the range is in device memory, a subset of the backing store has - * references. One option would be to split the range and device memory backing store, - * but the implementation for this would be quite complicated. Given that - * partial unmappings are rare and driver-defined range sizes are relatively - * small, GPU SVM does not support splitting of ranges. + * backing store for the range is in device memory, a subset of the backing + * store has references. One option would be to split the range and device + * memory backing store, but the implementation for this would be quite + * complicated. Given that partial unmappings are rare and driver-defined range + * sizes are relatively small, GPU SVM does not support splitting of ranges. * * With no support for range splitting, upon partial unmapping of a range, the * driver is expected to invalidate and destroy the entire range. If the range @@ -144,6 +153,8 @@ * * 1) GPU page fault handler * + * .. code-block:: c + * * int driver_bind_range(struct drm_gpusvm *gpusvm, struct drm_gpusvm_range *range) * { * int err = 0; @@ -208,7 +219,9 @@ * return err; * } * - * 2) Garbage Collector. + * 2) Garbage Collector + * + * .. code-block:: c * * void __driver_garbage_collector(struct drm_gpusvm *gpusvm, * struct drm_gpusvm_range *range) @@ -231,7 +244,9 @@ * __driver_garbage_collector(gpusvm, range); * } * - * 3) Notifier callback. + * 3) Notifier callback + * + * .. code-block:: c * * void driver_invalidation(struct drm_gpusvm *gpusvm, * struct drm_gpusvm_notifier *notifier, @@ -499,7 +514,7 @@ drm_gpusvm_notifier_invalidate(struct mmu_interval_notifier *mni, return true; } -/** +/* * drm_gpusvm_notifier_ops - MMU interval notifier operations for GPU SVM */ static const struct mmu_interval_notifier_ops drm_gpusvm_notifier_ops = { @@ -2055,7 +2070,6 @@ err_out: /** * drm_gpusvm_range_evict - Evict GPU SVM range - * @pagemap: Pointer to the GPU SVM structure * @range: Pointer to the GPU SVM range to be removed * * This function evicts the specified GPU SVM range. This function will not @@ -2146,8 +2160,8 @@ static vm_fault_t drm_gpusvm_migrate_to_ram(struct vm_fault *vmf) return err ? VM_FAULT_SIGBUS : 0; } -/** - * drm_gpusvm_pagemap_ops() - Device page map operations for GPU SVM +/* + * drm_gpusvm_pagemap_ops - Device page map operations for GPU SVM */ static const struct dev_pagemap_ops drm_gpusvm_pagemap_ops = { .page_free = drm_gpusvm_page_free, -- 2.51.0 From cedf23842d7433eb32cb782a637bb870fb096a3b Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 6 Mar 2025 20:00:05 -0800 Subject: [PATCH 06/16] drm/xe/rtp: Drop sentinels from arg to xe_rtp_process_to_sr() There's a mismatch on API: while xe_rtp_process_to_sr() processes entries until an entry without name, the active tracking with xe_rtp_process_ctx_enable_active_tracking() needs to use the number of elements. The number of elements is taken everywhere using ARRAY_SIZE(), but that will have one entry too many. This leads to the following warning, as reported by lkp: drivers/gpu/drm/xe/xe_tuning.c: In function 'xe_tuning_dump': >> include/drm/drm_print.h:228:31: warning: '%s' directive argument is null [-Wformat-overflow=] 228 | drm_printf((printer), "%.*s" fmt, (indent), "\t\t\t\t\tX", ##__VA_ARGS__) | ^~~~~~ drivers/gpu/drm/xe/xe_tuning.c:226:17: note: in expansion of macro 'drm_printf_indent' 226 | drm_printf_indent(p, 1, "%s\n", engine_tunings[idx].name); | ^~~~~~~~~~~~~~~~~ That's because it will still process the last entry when tracking the active tunings. The same issue exists in the WAs. Change xe_rtp_process_to_sr() to also take the number of elements so the empty entry can be removed and the warning should go away. Fixing on the active-tracking side would more fragile as the it would need a `- 1` everywhere and continue to use a different approach for number of elements. Aside from the warning, it's a non-issue as there would always be enough bits allocated and the last entry would never be active since xe_rtp_process_to_sr() stops on the sentinel. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202503021906.P2MwAvyK-lkp@intel.com/ Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20250306-fix-print-warning-v1-1-979c3dc03c0d@intel.com Signed-off-by: Lucas De Marchi (cherry picked from commit 8aa8c2d4214e1771c32101d70740002662d31bb7) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/tests/xe_rtp_test.c | 2 +- drivers/gpu/drm/xe/xe_hw_engine.c | 6 ++---- drivers/gpu/drm/xe/xe_reg_whitelist.c | 4 ++-- drivers/gpu/drm/xe/xe_rtp.c | 6 +++++- drivers/gpu/drm/xe/xe_rtp.h | 2 +- drivers/gpu/drm/xe/xe_tuning.c | 12 ++++-------- drivers/gpu/drm/xe/xe_wa.c | 12 +++--------- 7 files changed, 18 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/xe/tests/xe_rtp_test.c b/drivers/gpu/drm/xe/tests/xe_rtp_test.c index 36a3b5420fef..b0254b014fe4 100644 --- a/drivers/gpu/drm/xe/tests/xe_rtp_test.c +++ b/drivers/gpu/drm/xe/tests/xe_rtp_test.c @@ -320,7 +320,7 @@ static void xe_rtp_process_to_sr_tests(struct kunit *test) count_rtp_entries++; xe_rtp_process_ctx_enable_active_tracking(&ctx, &active, count_rtp_entries); - xe_rtp_process_to_sr(&ctx, param->entries, reg_sr); + xe_rtp_process_to_sr(&ctx, param->entries, count_rtp_entries, reg_sr); xa_for_each(®_sr->xa, idx, sre) { if (idx == param->expected_reg.addr) diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index fc447751fe78..223b95de388c 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -400,10 +400,9 @@ xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe) PREEMPT_GPGPU_THREAD_GROUP_LEVEL)), XE_RTP_ENTRY_FLAG(FOREACH_ENGINE) }, - {} }; - xe_rtp_process_to_sr(&ctx, lrc_setup, &hwe->reg_lrc); + xe_rtp_process_to_sr(&ctx, lrc_setup, ARRAY_SIZE(lrc_setup), &hwe->reg_lrc); } static void @@ -459,10 +458,9 @@ hw_engine_setup_default_state(struct xe_hw_engine *hwe) XE_RTP_ACTIONS(SET(CSFE_CHICKEN1(0), CS_PRIORITY_MEM_READ, XE_RTP_ACTION_FLAG(ENGINE_BASE))) }, - {} }; - xe_rtp_process_to_sr(&ctx, engine_entries, &hwe->reg_sr); + xe_rtp_process_to_sr(&ctx, engine_entries, ARRAY_SIZE(engine_entries), &hwe->reg_sr); } static const struct engine_info *find_engine_info(enum xe_engine_class class, int instance) diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.c b/drivers/gpu/drm/xe/xe_reg_whitelist.c index edab5d4e3ba5..23f6c81d9994 100644 --- a/drivers/gpu/drm/xe/xe_reg_whitelist.c +++ b/drivers/gpu/drm/xe/xe_reg_whitelist.c @@ -88,7 +88,6 @@ static const struct xe_rtp_entry_sr register_whitelist[] = { RING_FORCE_TO_NONPRIV_ACCESS_RD | RING_FORCE_TO_NONPRIV_RANGE_4)) }, - {} }; static void whitelist_apply_to_hwe(struct xe_hw_engine *hwe) @@ -137,7 +136,8 @@ void xe_reg_whitelist_process_engine(struct xe_hw_engine *hwe) { struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe); - xe_rtp_process_to_sr(&ctx, register_whitelist, &hwe->reg_whitelist); + xe_rtp_process_to_sr(&ctx, register_whitelist, ARRAY_SIZE(register_whitelist), + &hwe->reg_whitelist); whitelist_apply_to_hwe(hwe); } diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c index 7a1c78fdfc92..13bb62d3e615 100644 --- a/drivers/gpu/drm/xe/xe_rtp.c +++ b/drivers/gpu/drm/xe/xe_rtp.c @@ -237,6 +237,7 @@ static void rtp_mark_active(struct xe_device *xe, * the save-restore argument. * @ctx: The context for processing the table, with one of device, gt or hwe * @entries: Table with RTP definitions + * @n_entries: Number of entries to process, usually ARRAY_SIZE(entries) * @sr: Save-restore struct where matching rules execute the action. This can be * viewed as the "coalesced view" of multiple the tables. The bits for each * register set are expected not to collide with previously added entries @@ -247,6 +248,7 @@ static void rtp_mark_active(struct xe_device *xe, */ void xe_rtp_process_to_sr(struct xe_rtp_process_ctx *ctx, const struct xe_rtp_entry_sr *entries, + size_t n_entries, struct xe_reg_sr *sr) { const struct xe_rtp_entry_sr *entry; @@ -259,7 +261,9 @@ void xe_rtp_process_to_sr(struct xe_rtp_process_ctx *ctx, if (IS_SRIOV_VF(xe)) return; - for (entry = entries; entry && entry->name; entry++) { + xe_assert(xe, entries); + + for (entry = entries; entry - entries < n_entries; entry++) { bool match = false; if (entry->flags & XE_RTP_ENTRY_FLAG_FOREACH_ENGINE) { diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h index 38b9f13bba5e..4fe736a11c42 100644 --- a/drivers/gpu/drm/xe/xe_rtp.h +++ b/drivers/gpu/drm/xe/xe_rtp.h @@ -430,7 +430,7 @@ void xe_rtp_process_ctx_enable_active_tracking(struct xe_rtp_process_ctx *ctx, void xe_rtp_process_to_sr(struct xe_rtp_process_ctx *ctx, const struct xe_rtp_entry_sr *entries, - struct xe_reg_sr *sr); + size_t n_entries, struct xe_reg_sr *sr); void xe_rtp_process(struct xe_rtp_process_ctx *ctx, const struct xe_rtp_entry *entries); diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c index 77bc958f5a42..49ddbda7cdef 100644 --- a/drivers/gpu/drm/xe/xe_tuning.c +++ b/drivers/gpu/drm/xe/xe_tuning.c @@ -85,8 +85,6 @@ static const struct xe_rtp_entry_sr gt_tunings[] = { XE_RTP_RULES(MEDIA_VERSION(2000)), XE_RTP_ACTIONS(SET(XE2LPM_SCRATCH3_LBCF, RWFLUSHALLEN)) }, - - {} }; static const struct xe_rtp_entry_sr engine_tunings[] = { @@ -100,7 +98,6 @@ static const struct xe_rtp_entry_sr engine_tunings[] = { ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(SAMPLER_MODE, INDIRECT_STATE_BASE_ADDR_OVERRIDE)) }, - {} }; static const struct xe_rtp_entry_sr lrc_tunings[] = { @@ -138,8 +135,6 @@ static const struct xe_rtp_entry_sr lrc_tunings[] = { XE_RTP_ACTIONS(FIELD_SET(FF_MODE, VS_HIT_MAX_VALUE_MASK, REG_FIELD_PREP(VS_HIT_MAX_VALUE_MASK, 0x3f))) }, - - {} }; /** @@ -180,7 +175,7 @@ void xe_tuning_process_gt(struct xe_gt *gt) xe_rtp_process_ctx_enable_active_tracking(&ctx, gt->tuning_active.gt, ARRAY_SIZE(gt_tunings)); - xe_rtp_process_to_sr(&ctx, gt_tunings, >->reg_sr); + xe_rtp_process_to_sr(&ctx, gt_tunings, ARRAY_SIZE(gt_tunings), >->reg_sr); } EXPORT_SYMBOL_IF_KUNIT(xe_tuning_process_gt); @@ -191,7 +186,8 @@ void xe_tuning_process_engine(struct xe_hw_engine *hwe) xe_rtp_process_ctx_enable_active_tracking(&ctx, hwe->gt->tuning_active.engine, ARRAY_SIZE(engine_tunings)); - xe_rtp_process_to_sr(&ctx, engine_tunings, &hwe->reg_sr); + xe_rtp_process_to_sr(&ctx, engine_tunings, ARRAY_SIZE(engine_tunings), + &hwe->reg_sr); } EXPORT_SYMBOL_IF_KUNIT(xe_tuning_process_engine); @@ -210,7 +206,7 @@ void xe_tuning_process_lrc(struct xe_hw_engine *hwe) xe_rtp_process_ctx_enable_active_tracking(&ctx, hwe->gt->tuning_active.lrc, ARRAY_SIZE(lrc_tunings)); - xe_rtp_process_to_sr(&ctx, lrc_tunings, &hwe->reg_lrc); + xe_rtp_process_to_sr(&ctx, lrc_tunings, ARRAY_SIZE(lrc_tunings), &hwe->reg_lrc); } void xe_tuning_dump(struct xe_gt *gt, struct drm_printer *p) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 55eb453f4b1f..a25afb757f70 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -279,8 +279,6 @@ static const struct xe_rtp_entry_sr gt_was[] = { XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F10(0), RAMDFTUNIT_CLKGATE_DIS)), XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), }, - - {} }; static const struct xe_rtp_entry_sr engine_was[] = { @@ -624,8 +622,6 @@ static const struct xe_rtp_entry_sr engine_was[] = { FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(TDL_TSL_CHICKEN, RES_CHK_SPR_DIS)) }, - - {} }; static const struct xe_rtp_entry_sr lrc_was[] = { @@ -825,8 +821,6 @@ static const struct xe_rtp_entry_sr lrc_was[] = { DIS_PARTIAL_AUTOSTRIP | DIS_AUTOSTRIP)) }, - - {} }; static __maybe_unused const struct xe_rtp_entry oob_was[] = { @@ -868,7 +862,7 @@ void xe_wa_process_gt(struct xe_gt *gt) xe_rtp_process_ctx_enable_active_tracking(&ctx, gt->wa_active.gt, ARRAY_SIZE(gt_was)); - xe_rtp_process_to_sr(&ctx, gt_was, >->reg_sr); + xe_rtp_process_to_sr(&ctx, gt_was, ARRAY_SIZE(gt_was), >->reg_sr); } EXPORT_SYMBOL_IF_KUNIT(xe_wa_process_gt); @@ -886,7 +880,7 @@ void xe_wa_process_engine(struct xe_hw_engine *hwe) xe_rtp_process_ctx_enable_active_tracking(&ctx, hwe->gt->wa_active.engine, ARRAY_SIZE(engine_was)); - xe_rtp_process_to_sr(&ctx, engine_was, &hwe->reg_sr); + xe_rtp_process_to_sr(&ctx, engine_was, ARRAY_SIZE(engine_was), &hwe->reg_sr); } /** @@ -903,7 +897,7 @@ void xe_wa_process_lrc(struct xe_hw_engine *hwe) xe_rtp_process_ctx_enable_active_tracking(&ctx, hwe->gt->wa_active.lrc, ARRAY_SIZE(lrc_was)); - xe_rtp_process_to_sr(&ctx, lrc_was, &hwe->reg_lrc); + xe_rtp_process_to_sr(&ctx, lrc_was, ARRAY_SIZE(lrc_was), &hwe->reg_lrc); } /** -- 2.51.0 From 298661cd9cea55233cf60dee3ef9f736ddd1db7a Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Fri, 7 Mar 2025 11:13:59 +0000 Subject: [PATCH 07/16] drm/xe: Fix MOCS debugfs LNCF readout With only XE_FW_GT taken LNCF registers read back as all zeroes, leading to a wild goose chase trying to figure out why is register programming incorrect. Fix it by grabbing XE_FORCEWAKE_ALL for affected platforms. Signed-off-by: Tvrtko Ursulin Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20250307111402.26577-2-tvrtko.ursulin@igalia.com Signed-off-by: Rodrigo Vivi (cherry picked from commit 1182bc74b39ba3d124b544dab22d5672fae54b67) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_mocs.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c index 54d199b5cfb2..31dade91a089 100644 --- a/drivers/gpu/drm/xe/xe_mocs.c +++ b/drivers/gpu/drm/xe/xe_mocs.c @@ -781,7 +781,9 @@ void xe_mocs_dump(struct xe_gt *gt, struct drm_printer *p) flags = get_mocs_settings(xe, &table); xe_pm_runtime_get_noresume(xe); - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + fw_ref = xe_force_wake_get(gt_to_fw(gt), + flags & HAS_LNCF_MOCS ? + XE_FORCEWAKE_ALL : XE_FW_GT); if (!fw_ref) goto err_fw; -- 2.51.0 From e2a0a6328ea7385db00c3d4f3067ded9bbb709a1 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Fri, 7 Mar 2025 11:14:00 +0000 Subject: [PATCH 08/16] drm/xe: Fix ring flush invalidation Emit_flush_invalidate() is incorrectly marking the write to LRC_PPHWSP as a GGTT write and also writing an atypical ~0 dword as the payload. Fix it. While at it drop the unused flags argument. Signed-off-by: Tvrtko Ursulin Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20250307111402.26577-3-tvrtko.ursulin@igalia.com Signed-off-by: Rodrigo Vivi (cherry picked from commit 08ea901d0b8f6ea261d9936e03fa690540af0126) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_ring_ops.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c index d2f604aa96fa..3d1b4d3d788f 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops.c +++ b/drivers/gpu/drm/xe/xe_ring_ops.c @@ -111,16 +111,13 @@ static int emit_bb_start(u64 batch_addr, u32 ppgtt_flag, u32 *dw, int i) return i; } -static int emit_flush_invalidate(u32 flag, u32 *dw, int i) +static int emit_flush_invalidate(u32 *dw, int i) { - dw[i] = MI_FLUSH_DW; - dw[i] |= flag; - dw[i++] |= MI_INVALIDATE_TLB | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_IMM_DW | - MI_FLUSH_DW_STORE_INDEX; - - dw[i++] = LRC_PPHWSP_FLUSH_INVAL_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT; + dw[i++] = MI_FLUSH_DW | MI_INVALIDATE_TLB | MI_FLUSH_DW_OP_STOREDW | + MI_FLUSH_IMM_DW | MI_FLUSH_DW_STORE_INDEX; + dw[i++] = LRC_PPHWSP_FLUSH_INVAL_SCRATCH_ADDR; + dw[i++] = 0; dw[i++] = 0; - dw[i++] = ~0U; return i; } @@ -413,7 +410,7 @@ static void emit_migration_job_gen12(struct xe_sched_job *job, if (!IS_SRIOV_VF(gt_to_xe(job->q->gt))) { /* XXX: Do we need this? Leaving for now. */ dw[i++] = preparser_disable(true); - i = emit_flush_invalidate(0, dw, i); + i = emit_flush_invalidate(dw, i); dw[i++] = preparser_disable(false); } -- 2.51.0 From 11ef40eb57322322139de460f6370aec38da5a45 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Fri, 7 Mar 2025 11:14:01 +0000 Subject: [PATCH 09/16] drm/xe: Pass flags directly to emit_flush_imm_ggtt This is more readable than the nameless booleans and will also come handy later. Signed-off-by: Tvrtko Ursulin Reviewed-by: Matt Roper Reviewed-by: Tejas Upadhyay Link: https://patchwork.freedesktop.org/patch/msgid/20250307111402.26577-4-tvrtko.ursulin@igalia.com Signed-off-by: Rodrigo Vivi (cherry picked from commit 52a237e8d6c4abcda40c71268ee6cec75aa62799) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_ring_ops.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c index 3d1b4d3d788f..917fc16de866 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops.c +++ b/drivers/gpu/drm/xe/xe_ring_ops.c @@ -90,11 +90,10 @@ static int emit_flush_dw(u32 *dw, int i) return i; } -static int emit_flush_imm_ggtt(u32 addr, u32 value, bool invalidate_tlb, - u32 *dw, int i) +static int emit_flush_imm_ggtt(u32 addr, u32 value, u32 flags, u32 *dw, int i) { dw[i++] = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_IMM_DW | - (invalidate_tlb ? MI_INVALIDATE_TLB : 0); + flags; dw[i++] = addr | MI_FLUSH_DW_USE_GTT; dw[i++] = 0; dw[i++] = value; @@ -254,7 +253,7 @@ static void __emit_job_gen12_simple(struct xe_sched_job *job, struct xe_lrc *lrc if (job->ring_ops_flush_tlb) { dw[i++] = preparser_disable(true); i = emit_flush_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc), - seqno, true, dw, i); + seqno, MI_INVALIDATE_TLB, dw, i); dw[i++] = preparser_disable(false); } else { i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc), @@ -270,7 +269,7 @@ static void __emit_job_gen12_simple(struct xe_sched_job *job, struct xe_lrc *lrc dw, i); } - i = emit_flush_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, false, dw, i); + i = emit_flush_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, 0, dw, i); i = emit_user_interrupt(dw, i); @@ -316,7 +315,7 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc, if (job->ring_ops_flush_tlb) i = emit_flush_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc), - seqno, true, dw, i); + seqno, MI_INVALIDATE_TLB, dw, i); dw[i++] = preparser_disable(false); @@ -333,7 +332,7 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc, dw, i); } - i = emit_flush_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, false, dw, i); + i = emit_flush_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, 0, dw, i); i = emit_user_interrupt(dw, i); -- 2.51.0 From 7b7b07c285c304317d00ea21c2a659167d4d4d12 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Fri, 7 Mar 2025 11:14:02 +0000 Subject: [PATCH 10/16] drm/xe: Use correct type width for alignment in fb pinning code Plane->min_alignment returns an unsigned int so lets use that in the whole relevant call chain. Signed-off-by: Tvrtko Ursulin Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20250307111402.26577-5-tvrtko.ursulin@igalia.com Signed-off-by: Rodrigo Vivi (cherry picked from commit c36e3442ea1c4c63f9876486dd9091487a77c5f2) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/display/xe_fb_pin.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c index b69896baa20c..d918ae1c8061 100644 --- a/drivers/gpu/drm/xe/display/xe_fb_pin.c +++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c @@ -82,7 +82,7 @@ write_dpt_remapped(struct xe_bo *bo, struct iosys_map *map, u32 *dpt_ofs, static int __xe_pin_fb_vma_dpt(const struct intel_framebuffer *fb, const struct i915_gtt_view *view, struct i915_vma *vma, - u64 physical_alignment) + unsigned int alignment) { struct xe_device *xe = to_xe_device(fb->base.dev); struct xe_tile *tile0 = xe_device_get_root_tile(xe); @@ -108,7 +108,7 @@ static int __xe_pin_fb_vma_dpt(const struct intel_framebuffer *fb, XE_BO_FLAG_VRAM0 | XE_BO_FLAG_GGTT | XE_BO_FLAG_PAGETABLE, - physical_alignment); + alignment); else dpt = xe_bo_create_pin_map_at_aligned(xe, tile0, NULL, dpt_size, ~0ull, @@ -116,7 +116,7 @@ static int __xe_pin_fb_vma_dpt(const struct intel_framebuffer *fb, XE_BO_FLAG_STOLEN | XE_BO_FLAG_GGTT | XE_BO_FLAG_PAGETABLE, - physical_alignment); + alignment); if (IS_ERR(dpt)) dpt = xe_bo_create_pin_map_at_aligned(xe, tile0, NULL, dpt_size, ~0ull, @@ -124,7 +124,7 @@ static int __xe_pin_fb_vma_dpt(const struct intel_framebuffer *fb, XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT | XE_BO_FLAG_PAGETABLE, - physical_alignment); + alignment); if (IS_ERR(dpt)) return PTR_ERR(dpt); @@ -194,7 +194,7 @@ write_ggtt_rotated(struct xe_bo *bo, struct xe_ggtt *ggtt, u32 *ggtt_ofs, u32 bo static int __xe_pin_fb_vma_ggtt(const struct intel_framebuffer *fb, const struct i915_gtt_view *view, struct i915_vma *vma, - u64 physical_alignment) + unsigned int alignment) { struct drm_gem_object *obj = intel_fb_bo(&fb->base); struct xe_bo *bo = gem_to_xe_bo(obj); @@ -277,7 +277,7 @@ out: static struct i915_vma *__xe_pin_fb_vma(const struct intel_framebuffer *fb, const struct i915_gtt_view *view, - u64 physical_alignment) + unsigned int alignment) { struct drm_device *dev = fb->base.dev; struct xe_device *xe = to_xe_device(dev); @@ -327,9 +327,9 @@ static struct i915_vma *__xe_pin_fb_vma(const struct intel_framebuffer *fb, vma->bo = bo; if (intel_fb_uses_dpt(&fb->base)) - ret = __xe_pin_fb_vma_dpt(fb, view, vma, physical_alignment); + ret = __xe_pin_fb_vma_dpt(fb, view, vma, alignment); else - ret = __xe_pin_fb_vma_ggtt(fb, view, vma, physical_alignment); + ret = __xe_pin_fb_vma_ggtt(fb, view, vma, alignment); if (ret) goto err_unpin; @@ -422,7 +422,7 @@ int intel_plane_pin_fb(struct intel_plane_state *new_plane_state, struct i915_vma *vma; struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb); struct intel_plane *plane = to_intel_plane(new_plane_state->uapi.plane); - u64 phys_alignment = plane->min_alignment(plane, fb, 0); + unsigned int alignment = plane->min_alignment(plane, fb, 0); if (reuse_vma(new_plane_state, old_plane_state)) return 0; @@ -430,7 +430,7 @@ int intel_plane_pin_fb(struct intel_plane_state *new_plane_state, /* We reject creating !SCANOUT fb's, so this is weird.. */ drm_WARN_ON(bo->ttm.base.dev, !(bo->flags & XE_BO_FLAG_SCANOUT)); - vma = __xe_pin_fb_vma(intel_fb, &new_plane_state->view.gtt, phys_alignment); + vma = __xe_pin_fb_vma(intel_fb, &new_plane_state->view.gtt, alignment); if (IS_ERR(vma)) return PTR_ERR(vma); -- 2.51.0 From 80bcbdfc8cf51b41fde0a2466a83e057c5a1bf3a Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Wed, 12 Mar 2025 10:27:49 +0100 Subject: [PATCH 11/16] drm/xe/svm: Add stats for SVM page faults Add a new entry in stats to for svm page faults. If CONFIG_DEBUG_FS is enabled, the count can be viewed with per GT stat debugfs file. This is similar to what is already in place for vma page faults. Example output: cat /sys/kernel/debug/dri/0/gt0/stats svm_pagefault_count: 6 tlb_inval_count: 78 vma_pagefault_count: 0 vma_pagefault_kb: 0 v2: Fix build with CONFIG_DRM_GPUSVM disabled v3: Update argument in kernel doc Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20250312092749.164232-1-francois.dugast@intel.com Signed-off-by: Francois Dugast --- drivers/gpu/drm/xe/xe_gt_pagefault.c | 2 +- drivers/gpu/drm/xe/xe_gt_stats.c | 1 + drivers/gpu/drm/xe/xe_gt_stats_types.h | 1 + drivers/gpu/drm/xe/xe_svm.c | 8 ++++++-- drivers/gpu/drm/xe/xe_svm.h | 7 ++++--- 5 files changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c index c5ad9a0a89c2..9fa11e837dd1 100644 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c @@ -240,7 +240,7 @@ static int handle_pagefault(struct xe_gt *gt, struct pagefault *pf) atomic = access_is_atomic(pf->access_type); if (xe_vma_is_cpu_addr_mirror(vma)) - err = xe_svm_handle_pagefault(vm, vma, gt_to_tile(gt), + err = xe_svm_handle_pagefault(vm, vma, gt, pf->page_addr, atomic); else err = handle_vma_pagefault(gt, vma, atomic); diff --git a/drivers/gpu/drm/xe/xe_gt_stats.c b/drivers/gpu/drm/xe/xe_gt_stats.c index 6155ea354432..30f942671c2b 100644 --- a/drivers/gpu/drm/xe/xe_gt_stats.c +++ b/drivers/gpu/drm/xe/xe_gt_stats.c @@ -27,6 +27,7 @@ void xe_gt_stats_incr(struct xe_gt *gt, const enum xe_gt_stats_id id, int incr) } static const char *const stat_description[__XE_GT_STATS_NUM_IDS] = { + "svm_pagefault_count", "tlb_inval_count", "vma_pagefault_count", "vma_pagefault_kb", diff --git a/drivers/gpu/drm/xe/xe_gt_stats_types.h b/drivers/gpu/drm/xe/xe_gt_stats_types.h index d556771f99d6..be3244d7133c 100644 --- a/drivers/gpu/drm/xe/xe_gt_stats_types.h +++ b/drivers/gpu/drm/xe/xe_gt_stats_types.h @@ -7,6 +7,7 @@ #define _XE_GT_STATS_TYPES_H_ enum xe_gt_stats_id { + XE_GT_STATS_ID_SVM_PAGEFAULT_COUNT, XE_GT_STATS_ID_TLB_INVAL, XE_GT_STATS_ID_VMA_PAGEFAULT_COUNT, XE_GT_STATS_ID_VMA_PAGEFAULT_KB, diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c index 516898e99b26..08617a62ab07 100644 --- a/drivers/gpu/drm/xe/xe_svm.c +++ b/drivers/gpu/drm/xe/xe_svm.c @@ -4,6 +4,7 @@ */ #include "xe_bo.h" +#include "xe_gt_stats.h" #include "xe_gt_tlb_invalidation.h" #include "xe_migrate.h" #include "xe_module.h" @@ -713,7 +714,7 @@ unlock: * xe_svm_handle_pagefault() - SVM handle page fault * @vm: The VM. * @vma: The CPU address mirror VMA. - * @tile: The tile upon the fault occurred. + * @gt: The gt upon the fault occurred. * @fault_addr: The GPU fault address. * @atomic: The fault atomic access bit. * @@ -723,7 +724,7 @@ unlock: * Return: 0 on success, negative error code on error. */ int xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma, - struct xe_tile *tile, u64 fault_addr, + struct xe_gt *gt, u64 fault_addr, bool atomic) { struct drm_gpusvm_ctx ctx = { @@ -737,12 +738,15 @@ int xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma, struct drm_gpusvm_range *r; struct drm_exec exec; struct dma_fence *fence; + struct xe_tile *tile = gt_to_tile(gt); ktime_t end = 0; int err; lockdep_assert_held_write(&vm->lock); xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(vma)); + xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_PAGEFAULT_COUNT, 1); + retry: /* Always process UNMAPs first so view SVM ranges is current */ err = xe_svm_garbage_collector(vm); diff --git a/drivers/gpu/drm/xe/xe_svm.h b/drivers/gpu/drm/xe/xe_svm.h index e059590e5076..93442738666e 100644 --- a/drivers/gpu/drm/xe/xe_svm.h +++ b/drivers/gpu/drm/xe/xe_svm.h @@ -12,10 +12,11 @@ #define XE_INTERCONNECT_VRAM DRM_INTERCONNECT_DRIVER struct xe_bo; -struct xe_vram_region; +struct xe_gt; struct xe_tile; struct xe_vm; struct xe_vma; +struct xe_vram_region; /** struct xe_svm_range - SVM range */ struct xe_svm_range { @@ -64,7 +65,7 @@ void xe_svm_fini(struct xe_vm *vm); void xe_svm_close(struct xe_vm *vm); int xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma, - struct xe_tile *tile, u64 fault_addr, + struct xe_gt *gt, u64 fault_addr, bool atomic); bool xe_svm_has_mapping(struct xe_vm *vm, u64 start, u64 end); @@ -102,7 +103,7 @@ void xe_svm_close(struct xe_vm *vm) static inline int xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma, - struct xe_tile *tile, u64 fault_addr, + struct xe_gt *gt, u64 fault_addr, bool atomic) { return 0; -- 2.51.0 From 278469ff569e1082d56b4a7af26fbaecef9fbf3b Mon Sep 17 00:00:00 2001 From: Harish Chegondi Date: Wed, 12 Mar 2025 10:31:20 -0700 Subject: [PATCH 12/16] drm/xe/eustall: Fix a possible pointer dereference after free If devm_add_action_or_reset() isn't successful, xe_eu_stall_fini() is invoked. So, unsuccessful return from devm_add_action_or_reset() shouldn't dereference gt->eu_stall as xe_eu_stall_fini() already frees it. Fix this issue. Fixes: 9a0b11d4cf3b ("drm/xe/eustall: Add support to init, enable and disable EU stall sampling") Signed-off-by: Harish Chegondi Reviewed-by: Ashutosh Dixit Signed-off-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/eae49a414a7314921108e0388810aaee6261ad92.1741800396.git.harish.chegondi@intel.com --- drivers/gpu/drm/xe/xe_eu_stall.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_eu_stall.c b/drivers/gpu/drm/xe/xe_eu_stall.c index 88a92baf5c95..f2bb9168967c 100644 --- a/drivers/gpu/drm/xe/xe_eu_stall.c +++ b/drivers/gpu/drm/xe/xe_eu_stall.c @@ -222,13 +222,7 @@ int xe_eu_stall_init(struct xe_gt *gt) goto exit_free; } - ret = devm_add_action_or_reset(xe->drm.dev, xe_eu_stall_fini, gt); - if (ret) - goto exit_destroy; - - return 0; -exit_destroy: - destroy_workqueue(gt->eu_stall->buf_ptr_poll_wq); + return devm_add_action_or_reset(xe->drm.dev, xe_eu_stall_fini, gt); exit_free: mutex_destroy(>->eu_stall->stream_lock); kfree(gt->eu_stall); -- 2.51.0 From 28f79ac609de2797cccdd5fa6c4d5ec8bcef92b4 Mon Sep 17 00:00:00 2001 From: Raag Jadav Date: Wed, 12 Mar 2025 14:29:09 +0530 Subject: [PATCH 13/16] drm/xe/hwmon: expose fan speed Add hwmon support for fan1_input, fan2_input and fan3_input attributes, which will expose fan speed of respective channels in RPM when supported by hardware. With this in place we can monitor fan speed using lm-sensors tool. v2: Rely on platform checks instead of mailbox error (Aravind, Rodrigo) v3: Introduce has_fan_control flag (Rodrigo) Signed-off-by: Raag Jadav Reviewed-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20250312085909.755073-1-raag.jadav@intel.com Signed-off-by: Rodrigo Vivi --- .../ABI/testing/sysfs-driver-intel-xe-hwmon | 24 ++++ drivers/gpu/drm/xe/regs/xe_pcode_regs.h | 3 + drivers/gpu/drm/xe/xe_device_types.h | 2 + drivers/gpu/drm/xe/xe_hwmon.c | 125 +++++++++++++++++- drivers/gpu/drm/xe/xe_pci.c | 4 + drivers/gpu/drm/xe/xe_pcode_api.h | 3 + 6 files changed, 160 insertions(+), 1 deletion(-) diff --git a/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon b/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon index 9bce281314df..adbb9bce15a5 100644 --- a/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon +++ b/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon @@ -124,3 +124,27 @@ Contact: intel-xe@lists.freedesktop.org Description: RO. VRAM temperature in millidegree Celsius. Only supported for particular Intel Xe graphics platforms. + +What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon/fan1_input +Date: March 2025 +KernelVersion: 6.14 +Contact: intel-xe@lists.freedesktop.org +Description: RO. Fan 1 speed in RPM. + + Only supported for particular Intel Xe graphics platforms. + +What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon/fan2_input +Date: March 2025 +KernelVersion: 6.14 +Contact: intel-xe@lists.freedesktop.org +Description: RO. Fan 2 speed in RPM. + + Only supported for particular Intel Xe graphics platforms. + +What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon/fan3_input +Date: March 2025 +KernelVersion: 6.14 +Contact: intel-xe@lists.freedesktop.org +Description: RO. Fan 3 speed in RPM. + + Only supported for particular Intel Xe graphics platforms. diff --git a/drivers/gpu/drm/xe/regs/xe_pcode_regs.h b/drivers/gpu/drm/xe/regs/xe_pcode_regs.h index 8846eb9ce2a4..c7d5d782e3f9 100644 --- a/drivers/gpu/drm/xe/regs/xe_pcode_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_pcode_regs.h @@ -21,6 +21,9 @@ #define BMG_PACKAGE_POWER_SKU XE_REG(0x138098) #define BMG_PACKAGE_POWER_SKU_UNIT XE_REG(0x1380dc) #define BMG_PACKAGE_ENERGY_STATUS XE_REG(0x138120) +#define BMG_FAN_1_SPEED XE_REG(0x138140) +#define BMG_FAN_2_SPEED XE_REG(0x138170) +#define BMG_FAN_3_SPEED XE_REG(0x1381a0) #define BMG_VRAM_TEMPERATURE XE_REG(0x1382c0) #define BMG_PACKAGE_TEMPERATURE XE_REG(0x138434) #define BMG_PACKAGE_RAPL_LIMIT XE_REG(0x138440) diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index fac488942316..a2c0e791b199 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -314,6 +314,8 @@ struct xe_device { u8 has_atomic_enable_pte_bit:1; /** @info.has_device_atomics_on_smem: Supports device atomics on SMEM */ u8 has_device_atomics_on_smem:1; + /** @info.has_fan_control: Device supports fan control */ + u8 has_fan_control:1; /** @info.has_flat_ccs: Whether flat CCS metadata is used */ u8 has_flat_ccs:1; /** @info.has_heci_cscfi: device has heci cscfi */ diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c index 48d80ffdf7bb..eb293aec36a0 100644 --- a/drivers/gpu/drm/xe/xe_hwmon.c +++ b/drivers/gpu/drm/xe/xe_hwmon.c @@ -5,6 +5,7 @@ #include #include +#include #include #include @@ -27,6 +28,7 @@ enum xe_hwmon_reg { REG_PKG_POWER_SKU_UNIT, REG_GT_PERF_STATUS, REG_PKG_ENERGY_STATUS, + REG_FAN_SPEED, }; enum xe_hwmon_reg_operation { @@ -42,6 +44,13 @@ enum xe_hwmon_channel { CHANNEL_MAX, }; +enum xe_fan_channel { + FAN_1, + FAN_2, + FAN_3, + FAN_MAX, +}; + /* * SF_* - scale factors for particular quantities according to hwmon spec. */ @@ -61,6 +70,16 @@ struct xe_hwmon_energy_info { long accum_energy; }; +/** + * struct xe_hwmon_fan_info - to cache previous fan reading + */ +struct xe_hwmon_fan_info { + /** @reg_val_prev: previous fan reg val */ + u32 reg_val_prev; + /** @time_prev: previous timestamp */ + u64 time_prev; +}; + /** * struct xe_hwmon - xe hwmon data structure */ @@ -79,6 +98,8 @@ struct xe_hwmon { int scl_shift_time; /** @ei: Energy info for energyN_input */ struct xe_hwmon_energy_info ei[CHANNEL_MAX]; + /** @fi: Fan info for fanN_input */ + struct xe_hwmon_fan_info fi[FAN_MAX]; }; static struct xe_reg xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg hwmon_reg, @@ -144,6 +165,14 @@ static struct xe_reg xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg return PCU_CR_PACKAGE_ENERGY_STATUS; } break; + case REG_FAN_SPEED: + if (channel == FAN_1) + return BMG_FAN_1_SPEED; + else if (channel == FAN_2) + return BMG_FAN_2_SPEED; + else if (channel == FAN_3) + return BMG_FAN_3_SPEED; + break; default: drm_warn(&xe->drm, "Unknown xe hwmon reg id: %d\n", hwmon_reg); break; @@ -454,6 +483,7 @@ static const struct hwmon_channel_info * const hwmon_info[] = { HWMON_CHANNEL_INFO(curr, HWMON_C_LABEL, HWMON_C_CRIT | HWMON_C_LABEL), HWMON_CHANNEL_INFO(in, HWMON_I_INPUT | HWMON_I_LABEL, HWMON_I_INPUT | HWMON_I_LABEL), HWMON_CHANNEL_INFO(energy, HWMON_E_INPUT | HWMON_E_LABEL, HWMON_E_INPUT | HWMON_E_LABEL), + HWMON_CHANNEL_INFO(fan, HWMON_F_INPUT, HWMON_F_INPUT, HWMON_F_INPUT), NULL }; @@ -480,6 +510,19 @@ static int xe_hwmon_pcode_write_i1(const struct xe_hwmon *hwmon, u32 uval) (uval & POWER_SETUP_I1_DATA_MASK)); } +static int xe_hwmon_pcode_read_fan_control(const struct xe_hwmon *hwmon, u32 subcmd, u32 *uval) +{ + struct xe_tile *root_tile = xe_device_get_root_tile(hwmon->xe); + + /* Platforms that don't return correct value */ + if (hwmon->xe->info.platform == XE_DG2 && subcmd == FSC_READ_NUM_FANS) { + *uval = 2; + return 0; + } + + return xe_pcode_read(root_tile, PCODE_MBOX(FAN_SPEED_CONTROL, subcmd, 0), uval, NULL); +} + static int xe_hwmon_power_curr_crit_read(struct xe_hwmon *hwmon, int channel, long *value, u32 scale_factor) { @@ -705,6 +748,75 @@ xe_hwmon_energy_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *val) } } +static umode_t +xe_hwmon_fan_is_visible(struct xe_hwmon *hwmon, u32 attr, int channel) +{ + u32 uval; + + if (!hwmon->xe->info.has_fan_control) + return 0; + + switch (attr) { + case hwmon_fan_input: + if (xe_hwmon_pcode_read_fan_control(hwmon, FSC_READ_NUM_FANS, &uval)) + return 0; + + return channel < uval ? 0444 : 0; + default: + return 0; + } +} + +static int +xe_hwmon_fan_input_read(struct xe_hwmon *hwmon, int channel, long *val) +{ + struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe); + struct xe_hwmon_fan_info *fi = &hwmon->fi[channel]; + u64 rotations, time_now, time; + u32 reg_val; + int ret = 0; + + mutex_lock(&hwmon->hwmon_lock); + + reg_val = xe_mmio_read32(mmio, xe_hwmon_get_reg(hwmon, REG_FAN_SPEED, channel)); + time_now = get_jiffies_64(); + + /* + * HW register value is accumulated count of pulses from PWM fan with the scale + * of 2 pulses per rotation. + */ + rotations = (reg_val - fi->reg_val_prev) / 2; + + time = jiffies_delta_to_msecs(time_now - fi->time_prev); + if (unlikely(!time)) { + ret = -EAGAIN; + goto unlock; + } + + /* + * Calculate fan speed in RPM by time averaging two subsequent readings in minutes. + * RPM = number of rotations * msecs per minute / time in msecs + */ + *val = DIV_ROUND_UP_ULL(rotations * (MSEC_PER_SEC * 60), time); + + fi->reg_val_prev = reg_val; + fi->time_prev = time_now; +unlock: + mutex_unlock(&hwmon->hwmon_lock); + return ret; +} + +static int +xe_hwmon_fan_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *val) +{ + switch (attr) { + case hwmon_fan_input: + return xe_hwmon_fan_input_read(hwmon, channel, val); + default: + return -EOPNOTSUPP; + } +} + static umode_t xe_hwmon_is_visible(const void *drvdata, enum hwmon_sensor_types type, u32 attr, int channel) @@ -730,6 +842,9 @@ xe_hwmon_is_visible(const void *drvdata, enum hwmon_sensor_types type, case hwmon_energy: ret = xe_hwmon_energy_is_visible(hwmon, attr, channel); break; + case hwmon_fan: + ret = xe_hwmon_fan_is_visible(hwmon, attr, channel); + break; default: ret = 0; break; @@ -765,6 +880,9 @@ xe_hwmon_read(struct device *dev, enum hwmon_sensor_types type, u32 attr, case hwmon_energy: ret = xe_hwmon_energy_read(hwmon, attr, channel, val); break; + case hwmon_fan: + ret = xe_hwmon_fan_read(hwmon, attr, channel, val); + break; default: ret = -EOPNOTSUPP; break; @@ -842,7 +960,7 @@ static void xe_hwmon_get_preregistration_info(struct xe_hwmon *hwmon) { struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe); - long energy; + long energy, fan_speed; u64 val_sku_unit = 0; int channel; struct xe_reg pkg_power_sku_unit; @@ -866,6 +984,11 @@ xe_hwmon_get_preregistration_info(struct xe_hwmon *hwmon) for (channel = 0; channel < CHANNEL_MAX; channel++) if (xe_hwmon_is_visible(hwmon, hwmon_energy, hwmon_energy_input, channel)) xe_hwmon_energy_get(hwmon, channel, &energy); + + /* Initialize 'struct xe_hwmon_fan_info' with initial fan register reading. */ + for (channel = 0; channel < FAN_MAX; channel++) + if (xe_hwmon_is_visible(hwmon, hwmon_fan, hwmon_fan_input, channel)) + xe_hwmon_fan_input_read(hwmon, channel, &fan_speed); } static void xe_hwmon_mutex_destroy(void *arg) diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index da9679c8cf26..fc89d744978a 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -62,6 +62,7 @@ struct xe_device_desc { u8 is_dgfx:1; u8 has_display:1; + u8 has_fan_control:1; u8 has_heci_gscfi:1; u8 has_heci_cscfi:1; u8 has_llc:1; @@ -302,6 +303,7 @@ static const struct xe_device_desc dg2_desc = { DG2_FEATURES, .has_display = true, + .has_fan_control = true, }; static const __maybe_unused struct xe_device_desc pvc_desc = { @@ -336,6 +338,7 @@ static const struct xe_device_desc bmg_desc = { PLATFORM(BATTLEMAGE), .dma_mask_size = 46, .has_display = true, + .has_fan_control = true, .has_heci_cscfi = 1, }; @@ -575,6 +578,7 @@ static int xe_info_init_early(struct xe_device *xe, xe->info.dma_mask_size = desc->dma_mask_size; xe->info.is_dgfx = desc->is_dgfx; + xe->info.has_fan_control = desc->has_fan_control; xe->info.has_heci_gscfi = desc->has_heci_gscfi; xe->info.has_heci_cscfi = desc->has_heci_cscfi; xe->info.has_llc = desc->has_llc; diff --git a/drivers/gpu/drm/xe/xe_pcode_api.h b/drivers/gpu/drm/xe/xe_pcode_api.h index 2bae9afdbd35..e622ae17f08d 100644 --- a/drivers/gpu/drm/xe/xe_pcode_api.h +++ b/drivers/gpu/drm/xe/xe_pcode_api.h @@ -49,6 +49,9 @@ /* Domain IDs (param2) */ #define PCODE_MBOX_DOMAIN_HBM 0x2 +#define FAN_SPEED_CONTROL 0x7D +#define FSC_READ_NUM_FANS 0x4 + #define PCODE_SCRATCH(x) XE_REG(0x138320 + ((x) * 4)) /* PCODE_SCRATCH0 */ #define AUXINFO_REG_OFFSET REG_GENMASK(17, 15) -- 2.51.0 From b96dabdba9b95f71ded50a1c094ee244408b2a8e Mon Sep 17 00:00:00 2001 From: Tomasz Rusinowicz Date: Tue, 18 Feb 2025 11:03:53 +0100 Subject: [PATCH 14/16] drm/xe: Fix exporting xe buffers multiple times MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit The `struct ttm_resource->placement` contains TTM_PL_FLAG_* flags, but it was incorrectly tested for XE_PL_* flags. This caused xe_dma_buf_pin() to always fail when invoked for the second time. Fix this by checking the `mem_type` field instead. Fixes: 7764222d54b7 ("drm/xe: Disallow pinning dma-bufs in VRAM") Cc: Thomas Hellström Cc: Rodrigo Vivi Cc: Lucas De Marchi Cc: "Thomas Hellström" Cc: Michal Wajdeczko Cc: Matthew Brost Cc: Matthew Auld Cc: Nirmoy Das Cc: Jani Nikula Cc: intel-xe@lists.freedesktop.org Cc: # v6.8+ Signed-off-by: Tomasz Rusinowicz Signed-off-by: Jacek Lawrynowicz Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20250218100353.2137964-1-jacek.lawrynowicz@linux.intel.com Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_bo.h | 2 -- drivers/gpu/drm/xe/xe_dma_buf.c | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index bda3fdd408da..ec3e4446d027 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -405,7 +405,6 @@ long xe_bo_shrink(struct ttm_operation_ctx *ctx, struct ttm_buffer_object *bo, const struct xe_bo_shrink_flags flags, unsigned long *scanned); -#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) /** * xe_bo_is_mem_type - Whether the bo currently resides in the given * TTM memory type @@ -420,4 +419,3 @@ static inline bool xe_bo_is_mem_type(struct xe_bo *bo, u32 mem_type) return bo->ttm.resource->mem_type == mem_type; } #endif -#endif diff --git a/drivers/gpu/drm/xe/xe_dma_buf.c b/drivers/gpu/drm/xe/xe_dma_buf.c index c5b95470fa32..f67803e15a0e 100644 --- a/drivers/gpu/drm/xe/xe_dma_buf.c +++ b/drivers/gpu/drm/xe/xe_dma_buf.c @@ -58,7 +58,7 @@ static int xe_dma_buf_pin(struct dma_buf_attachment *attach) * 1) Avoid pinning in a placement not accessible to some importers. * 2) Pinning in VRAM requires PIN accounting which is a to-do. */ - if (xe_bo_is_pinned(bo) && bo->ttm.resource->placement != XE_PL_TT) { + if (xe_bo_is_pinned(bo) && !xe_bo_is_mem_type(bo, XE_PL_TT)) { drm_dbg(&xe->drm, "Can't migrate pinned bo for dma-buf pin.\n"); return -EINVAL; } -- 2.51.0 From 7c53ff050ba88bb37eed3e17f2bb8ec592d83302 Mon Sep 17 00:00:00 2001 From: Vinay Belgaumkar Date: Thu, 20 Mar 2025 10:51:23 -0700 Subject: [PATCH 15/16] drm/xe: Apply Wa_16023105232 The WA requires KMD to disable DOP clock gating during a semaphore wait and also ensure that idle delay for every CS is lower than the idle wait time in the PWRCTX_MAXCNT register. Default values for these registers already comply with this restriction. v2: Store timestamp_base in gt info and other comments (Daniele) v3: Skip WA check for VF v4: Review comments (Matt Roper) v5: Cleanup the clock functions and use reg_field_get (Matt Roper) v6: Fix checkpatch issue v7: Fix CI issue Cc: Matt Roper Reviewed-by: Matt Roper Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: Vinay Belgaumkar Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20250320175123.3026754-1-vinay.belgaumkar@intel.com --- drivers/gpu/drm/xe/regs/xe_engine_regs.h | 4 +++ drivers/gpu/drm/xe/xe_gt_clock.c | 39 ++++++++++++++++-------- drivers/gpu/drm/xe/xe_gt_types.h | 2 ++ drivers/gpu/drm/xe/xe_hw_engine.c | 33 ++++++++++++++++++++ drivers/gpu/drm/xe/xe_wa.c | 6 ++++ drivers/gpu/drm/xe/xe_wa_oob.rules | 2 ++ 6 files changed, 74 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h index 659cf85fa3d6..da713634d6a0 100644 --- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h @@ -130,6 +130,10 @@ #define RING_EXECLIST_STATUS_LO(base) XE_REG((base) + 0x234) #define RING_EXECLIST_STATUS_HI(base) XE_REG((base) + 0x234 + 4) +#define RING_IDLEDLY(base) XE_REG((base) + 0x23c) +#define INHIBIT_SWITCH_UNTIL_PREEMPTED REG_BIT(31) +#define IDLE_DELAY REG_GENMASK(20, 0) + #define RING_CONTEXT_CONTROL(base) XE_REG((base) + 0x244, XE_REG_OPTION_MASKED) #define CTX_CTRL_PXP_ENABLE REG_BIT(10) #define CTX_CTRL_OAC_CONTEXT_ENABLE REG_BIT(8) diff --git a/drivers/gpu/drm/xe/xe_gt_clock.c b/drivers/gpu/drm/xe/xe_gt_clock.c index fca38738e610..4f011d1573c6 100644 --- a/drivers/gpu/drm/xe/xe_gt_clock.c +++ b/drivers/gpu/drm/xe/xe_gt_clock.c @@ -16,27 +16,42 @@ #include "xe_macros.h" #include "xe_mmio.h" -static u32 get_crystal_clock_freq(u32 rpm_config_reg) +#define f19_2_mhz 19200000 +#define f24_mhz 24000000 +#define f25_mhz 25000000 +#define f38_4_mhz 38400000 +#define ts_base_83 83333 +#define ts_base_52 52083 +#define ts_base_80 80000 + +static void read_crystal_clock(struct xe_gt *gt, u32 rpm_config_reg, u32 *freq, + u32 *timestamp_base) { - const u32 f19_2_mhz = 19200000; - const u32 f24_mhz = 24000000; - const u32 f25_mhz = 25000000; - const u32 f38_4_mhz = 38400000; u32 crystal_clock = REG_FIELD_GET(RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK, rpm_config_reg); switch (crystal_clock) { case RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ: - return f24_mhz; + *freq = f24_mhz; + *timestamp_base = ts_base_83; + return; case RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ: - return f19_2_mhz; + *freq = f19_2_mhz; + *timestamp_base = ts_base_52; + return; case RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_38_4_MHZ: - return f38_4_mhz; + *freq = f38_4_mhz; + *timestamp_base = ts_base_52; + return; case RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_25_MHZ: - return f25_mhz; + *freq = f25_mhz; + *timestamp_base = ts_base_80; + return; default: - XE_WARN_ON("NOT_POSSIBLE"); - return 0; + xe_gt_warn(gt, "Invalid crystal clock frequency: %u", crystal_clock); + *freq = 0; + *timestamp_base = 0; + return; } } @@ -65,7 +80,7 @@ int xe_gt_clock_init(struct xe_gt *gt) check_ctc_mode(gt); c0 = xe_mmio_read32(>->mmio, RPM_CONFIG0); - freq = get_crystal_clock_freq(c0); + read_crystal_clock(gt, c0, &freq, >->info.timestamp_base); /* * Now figure out how the command stream's timestamp diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index e3cfb026ac88..7def0959da35 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -121,6 +121,8 @@ struct xe_gt { enum xe_gt_type type; /** @info.reference_clock: clock frequency */ u32 reference_clock; + /** @info.timestamp_base: GT timestamp base */ + u32 timestamp_base; /** * @info.engine_mask: mask of engines present on GT. Some of * them may be reserved in runtime and not available for user. diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index 223b95de388c..8c05fd30b7df 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -8,7 +8,9 @@ #include #include +#include #include +#include #include "regs/xe_engine_regs.h" #include "regs/xe_gt_regs.h" @@ -21,6 +23,7 @@ #include "xe_gsc.h" #include "xe_gt.h" #include "xe_gt_ccs_mode.h" +#include "xe_gt_clock.h" #include "xe_gt_printk.h" #include "xe_gt_mcr.h" #include "xe_gt_topology.h" @@ -564,6 +567,33 @@ static void hw_engine_init_early(struct xe_gt *gt, struct xe_hw_engine *hwe, xe_reg_whitelist_process_engine(hwe); } +static void adjust_idledly(struct xe_hw_engine *hwe) +{ + struct xe_gt *gt = hwe->gt; + u32 idledly, maxcnt; + u32 idledly_units_ps = 8 * gt->info.timestamp_base; + u32 maxcnt_units_ns = 640; + bool inhibit_switch = 0; + + if (!IS_SRIOV_VF(gt_to_xe(hwe->gt)) && XE_WA(gt, 16023105232)) { + idledly = xe_mmio_read32(>->mmio, RING_IDLEDLY(hwe->mmio_base)); + maxcnt = xe_mmio_read32(>->mmio, RING_PWRCTX_MAXCNT(hwe->mmio_base)); + + inhibit_switch = idledly & INHIBIT_SWITCH_UNTIL_PREEMPTED; + idledly = REG_FIELD_GET(IDLE_DELAY, idledly); + idledly = DIV_ROUND_CLOSEST(idledly * idledly_units_ps, 1000); + maxcnt = REG_FIELD_GET(IDLE_WAIT_TIME, maxcnt); + maxcnt *= maxcnt_units_ns; + + if (xe_gt_WARN_ON(gt, idledly >= maxcnt || inhibit_switch)) { + idledly = DIV_ROUND_CLOSEST(((maxcnt - 1) * maxcnt_units_ns), + idledly_units_ps); + idledly = DIV_ROUND_CLOSEST(idledly, 1000); + xe_mmio_write32(>->mmio, RING_IDLEDLY(hwe->mmio_base), idledly); + } + } +} + static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe, enum xe_hw_engine_id id) { @@ -604,6 +634,9 @@ static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe, if (xe->info.has_usm && hwe->class == XE_ENGINE_CLASS_COPY) gt->usm.reserved_bcs_instance = hwe->instance; + /* Ensure IDLEDLY is lower than MAXCNT */ + adjust_idledly(hwe); + return devm_add_action_or_reset(xe->drm.dev, hw_engine_fini, hwe); err_hwsp: diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index a25afb757f70..24f644c0a673 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -622,6 +622,12 @@ static const struct xe_rtp_entry_sr engine_was[] = { FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(TDL_TSL_CHICKEN, RES_CHK_SPR_DIS)) }, + { XE_RTP_NAME("16023105232"), + XE_RTP_RULES(MEDIA_VERSION_RANGE(1301, 3000), OR, + GRAPHICS_VERSION_RANGE(2001, 3001)), + XE_RTP_ACTIONS(SET(RING_PSMI_CTL(0), RC_SEMA_IDLE_MSG_DISABLE, + XE_RTP_ACTION_FLAG(ENGINE_BASE))) + }, }; static const struct xe_rtp_entry_sr lrc_was[] = { diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index e0c5fa460487..0c738af24f7c 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -53,3 +53,5 @@ no_media_l3 MEDIA_VERSION(3000) GRAPHICS_VERSION_RANGE(1270, 1274) 1508761755 GRAPHICS_VERSION(1255) GRAPHICS_VERSION(1260), GRAPHICS_STEP(A0, B0) +16023105232 GRAPHICS_VERSION_RANGE(2001, 3001) + MEDIA_VERSION_RANGE(1301, 3000) -- 2.51.0 From f3e08e98bf408a44182b0e3e946521a0e9b5482f Mon Sep 17 00:00:00 2001 From: =?utf8?q?Thomas=20Hellstr=C3=B6m?= Date: Fri, 21 Mar 2025 14:37:09 +0100 Subject: [PATCH 16/16] drm/xe: Simplify pinned bo iteration MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Introduce and use a helper to iterate over the various pinned bo lists. There are a couple of slight functional changes: 1) GGTT maps are now performed with the bo locked. 2) If the per-bo callback fails, keep the bo on the original list. v2: - Skip unrelated change in xe_bo.c Cc: Matthew Auld Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20250321133709.75327-1-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/xe/xe_bo_evict.c | 209 ++++++++++++------------------- 1 file changed, 82 insertions(+), 127 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_bo_evict.c b/drivers/gpu/drm/xe/xe_bo_evict.c index 6a40eedd9db1..1eeb3910450b 100644 --- a/drivers/gpu/drm/xe/xe_bo_evict.c +++ b/drivers/gpu/drm/xe/xe_bo_evict.c @@ -10,6 +10,44 @@ #include "xe_ggtt.h" #include "xe_tile.h" +typedef int (*xe_pinned_fn)(struct xe_bo *bo); + +static int xe_bo_apply_to_pinned(struct xe_device *xe, + struct list_head *pinned_list, + struct list_head *new_list, + const xe_pinned_fn pinned_fn) +{ + LIST_HEAD(still_in_list); + struct xe_bo *bo; + int ret = 0; + + spin_lock(&xe->pinned.lock); + while (!ret) { + bo = list_first_entry_or_null(pinned_list, typeof(*bo), + pinned_link); + if (!bo) + break; + xe_bo_get(bo); + list_move_tail(&bo->pinned_link, &still_in_list); + spin_unlock(&xe->pinned.lock); + + xe_bo_lock(bo, false); + ret = pinned_fn(bo); + if (ret && pinned_list != new_list) { + spin_lock(&xe->pinned.lock); + list_move(&bo->pinned_link, pinned_list); + spin_unlock(&xe->pinned.lock); + } + xe_bo_unlock(bo); + xe_bo_put(bo); + spin_lock(&xe->pinned.lock); + } + list_splice_tail(&still_in_list, new_list); + spin_unlock(&xe->pinned.lock); + + return ret; +} + /** * xe_bo_evict_all - evict all BOs from VRAM * @@ -27,9 +65,7 @@ int xe_bo_evict_all(struct xe_device *xe) { struct ttm_device *bdev = &xe->ttm; - struct xe_bo *bo; struct xe_tile *tile; - struct list_head still_in_list; u32 mem_type; u8 id; int ret; @@ -57,34 +93,9 @@ int xe_bo_evict_all(struct xe_device *xe) } } - /* Pinned user memory in VRAM */ - INIT_LIST_HEAD(&still_in_list); - spin_lock(&xe->pinned.lock); - for (;;) { - bo = list_first_entry_or_null(&xe->pinned.external_vram, - typeof(*bo), pinned_link); - if (!bo) - break; - xe_bo_get(bo); - list_move_tail(&bo->pinned_link, &still_in_list); - spin_unlock(&xe->pinned.lock); - - xe_bo_lock(bo, false); - ret = xe_bo_evict_pinned(bo); - xe_bo_unlock(bo); - xe_bo_put(bo); - if (ret) { - spin_lock(&xe->pinned.lock); - list_splice_tail(&still_in_list, - &xe->pinned.external_vram); - spin_unlock(&xe->pinned.lock); - return ret; - } - - spin_lock(&xe->pinned.lock); - } - list_splice_tail(&still_in_list, &xe->pinned.external_vram); - spin_unlock(&xe->pinned.lock); + ret = xe_bo_apply_to_pinned(xe, &xe->pinned.external_vram, + &xe->pinned.external_vram, + xe_bo_evict_pinned); /* * Wait for all user BO to be evicted as those evictions depend on the @@ -93,26 +104,42 @@ int xe_bo_evict_all(struct xe_device *xe) for_each_tile(tile, xe, id) xe_tile_migrate_wait(tile); - spin_lock(&xe->pinned.lock); - for (;;) { - bo = list_first_entry_or_null(&xe->pinned.kernel_bo_present, - typeof(*bo), pinned_link); - if (!bo) - break; - xe_bo_get(bo); - list_move_tail(&bo->pinned_link, &xe->pinned.evicted); - spin_unlock(&xe->pinned.lock); + if (ret) + return ret; - xe_bo_lock(bo, false); - ret = xe_bo_evict_pinned(bo); - xe_bo_unlock(bo); - xe_bo_put(bo); - if (ret) - return ret; + return xe_bo_apply_to_pinned(xe, &xe->pinned.kernel_bo_present, + &xe->pinned.evicted, + xe_bo_evict_pinned); +} - spin_lock(&xe->pinned.lock); +static int xe_bo_restore_and_map_ggtt(struct xe_bo *bo) +{ + struct xe_device *xe = xe_bo_device(bo); + int ret; + + ret = xe_bo_restore_pinned(bo); + if (ret) + return ret; + + if (bo->flags & XE_BO_FLAG_GGTT) { + struct xe_tile *tile; + u8 id; + + for_each_tile(tile, xe_bo_device(bo), id) { + if (tile != bo->tile && !(bo->flags & XE_BO_FLAG_GGTTx(tile))) + continue; + + mutex_lock(&tile->mem.ggtt->lock); + xe_ggtt_map_bo(tile->mem.ggtt, bo); + mutex_unlock(&tile->mem.ggtt->lock); + } } - spin_unlock(&xe->pinned.lock); + + /* + * We expect validate to trigger a move VRAM and our move code + * should setup the iosys map. + */ + xe_assert(xe, !iosys_map_is_null(&bo->vmap)); return 0; } @@ -130,54 +157,9 @@ int xe_bo_evict_all(struct xe_device *xe) */ int xe_bo_restore_kernel(struct xe_device *xe) { - struct xe_bo *bo; - int ret; - - spin_lock(&xe->pinned.lock); - for (;;) { - bo = list_first_entry_or_null(&xe->pinned.evicted, - typeof(*bo), pinned_link); - if (!bo) - break; - xe_bo_get(bo); - list_move_tail(&bo->pinned_link, &xe->pinned.kernel_bo_present); - spin_unlock(&xe->pinned.lock); - - xe_bo_lock(bo, false); - ret = xe_bo_restore_pinned(bo); - xe_bo_unlock(bo); - if (ret) { - xe_bo_put(bo); - return ret; - } - - if (bo->flags & XE_BO_FLAG_GGTT) { - struct xe_tile *tile; - u8 id; - - for_each_tile(tile, xe, id) { - if (tile != bo->tile && !(bo->flags & XE_BO_FLAG_GGTTx(tile))) - continue; - - mutex_lock(&tile->mem.ggtt->lock); - xe_ggtt_map_bo(tile->mem.ggtt, bo); - mutex_unlock(&tile->mem.ggtt->lock); - } - } - - /* - * We expect validate to trigger a move VRAM and our move code - * should setup the iosys map. - */ - xe_assert(xe, !iosys_map_is_null(&bo->vmap)); - - xe_bo_put(bo); - - spin_lock(&xe->pinned.lock); - } - spin_unlock(&xe->pinned.lock); - - return 0; + return xe_bo_apply_to_pinned(xe, &xe->pinned.evicted, + &xe->pinned.kernel_bo_present, + xe_bo_restore_and_map_ggtt); } /** @@ -192,47 +174,20 @@ int xe_bo_restore_kernel(struct xe_device *xe) */ int xe_bo_restore_user(struct xe_device *xe) { - struct xe_bo *bo; struct xe_tile *tile; - struct list_head still_in_list; - u8 id; - int ret; + int ret, id; if (!IS_DGFX(xe)) return 0; /* Pinned user memory in VRAM should be validated on resume */ - INIT_LIST_HEAD(&still_in_list); - spin_lock(&xe->pinned.lock); - for (;;) { - bo = list_first_entry_or_null(&xe->pinned.external_vram, - typeof(*bo), pinned_link); - if (!bo) - break; - list_move_tail(&bo->pinned_link, &still_in_list); - xe_bo_get(bo); - spin_unlock(&xe->pinned.lock); - - xe_bo_lock(bo, false); - ret = xe_bo_restore_pinned(bo); - xe_bo_unlock(bo); - xe_bo_put(bo); - if (ret) { - spin_lock(&xe->pinned.lock); - list_splice_tail(&still_in_list, - &xe->pinned.external_vram); - spin_unlock(&xe->pinned.lock); - return ret; - } - - spin_lock(&xe->pinned.lock); - } - list_splice_tail(&still_in_list, &xe->pinned.external_vram); - spin_unlock(&xe->pinned.lock); + ret = xe_bo_apply_to_pinned(xe, &xe->pinned.external_vram, + &xe->pinned.external_vram, + xe_bo_restore_pinned); /* Wait for restore to complete */ for_each_tile(tile, xe, id) xe_tile_migrate_wait(tile); - return 0; + return ret; } -- 2.51.0