From 676da6ba5bdc1997910ac1cc901cbf16bc6f6d97 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Fri, 14 Mar 2025 06:54:27 -0700
Subject: [PATCH 01/16] drm/xe: Allow to inject error in early probe

Allow to test if driver behaves correctly when xe_pcode_probe_early()
fails. Note that this is not sufficient for testing survivability mode
as it's still required to read the hw to check for errors, which doesn't
happen on an injected failure.

To complete the early probe coverage, allow injection in the other
functions as well: xe_mmio_probe_early() and xe_device_probe_early().

Reviewed-by: Francois Dugast <francois.dugast@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20250314-fix-survivability-v5-3-fdb3559ea965@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
---
 drivers/gpu/drm/xe/xe_device.c | 1 +
 drivers/gpu/drm/xe/xe_mmio.c   | 1 +
 drivers/gpu/drm/xe/xe_pcode.c  | 2 ++
 3 files changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 0e8805f93468..1ffb7d1f6be6 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -735,6 +735,7 @@ int xe_device_probe_early(struct xe_device *xe)
 
 	return 0;
 }
+ALLOW_ERROR_INJECTION(xe_device_probe_early, ERRNO); /* See xe_pci_probe() */
 
 static int probe_has_flat_ccs(struct xe_device *xe)
 {
diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index 13e06a956ceb..096c38cc51c8 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -138,6 +138,7 @@ int xe_mmio_probe_early(struct xe_device *xe)
 
 	return devm_add_action_or_reset(xe->drm.dev, mmio_fini, xe);
 }
+ALLOW_ERROR_INJECTION(xe_mmio_probe_early, ERRNO); /* See xe_pci_probe() */
 
 /**
  * xe_mmio_init() - Initialize an MMIO instance
diff --git a/drivers/gpu/drm/xe/xe_pcode.c b/drivers/gpu/drm/xe/xe_pcode.c
index 9333ce776a6e..cf955b3ed52c 100644
--- a/drivers/gpu/drm/xe/xe_pcode.c
+++ b/drivers/gpu/drm/xe/xe_pcode.c
@@ -7,6 +7,7 @@
 
 #include <linux/delay.h>
 #include <linux/errno.h>
+#include <linux/error-injection.h>
 
 #include <drm/drm_managed.h>
 
@@ -323,3 +324,4 @@ int xe_pcode_probe_early(struct xe_device *xe)
 {
 	return xe_pcode_ready(xe, false);
 }
+ALLOW_ERROR_INJECTION(xe_pcode_probe_early, ERRNO); /* See xe_pci_probe */
-- 
2.51.0


From 689582882802cd64986c1eb584c9f5184d67f0cf Mon Sep 17 00:00:00 2001
From: Yue Haibing <yuehaibing@huawei.com>
Date: Sun, 23 Mar 2025 19:41:03 +0800
Subject: [PATCH 02/16] drm/xe: Fix unmet direct dependencies warning

WARNING: unmet direct dependencies detected for FB_IOMEM_HELPERS
  Depends on [n]: HAS_IOMEM [=y] && FB_CORE [=n]
  Selected by [m]:
  - DRM_XE_DISPLAY [=y] && HAS_IOMEM [=y] && DRM [=m] && DRM_XE [=m] && DRM_XE [=m]=m [=m] && HAS_IOPORT [=y]

DRM_XE_DISPLAY requires FB_IOMEM_HELPERS, but the dependency FB_CORE is
missing, selecting FB_IOMEM_HELPERS if DRM_FBDEV_EMULATION is set as
other drm drivers.

Fixes: 44e694958b95 ("drm/xe/display: Implement display support")
Signed-off-by: Yue Haibing <yuehaibing@huawei.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20250323114103.1960511-1-yuehaibing@huawei.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
---
 drivers/gpu/drm/xe/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig
index 7d7995196702..5c2f459a2925 100644
--- a/drivers/gpu/drm/xe/Kconfig
+++ b/drivers/gpu/drm/xe/Kconfig
@@ -53,7 +53,7 @@ config DRM_XE
 config DRM_XE_DISPLAY
 	bool "Enable display support"
 	depends on DRM_XE && DRM_XE=m && HAS_IOPORT
-	select FB_IOMEM_HELPERS
+	select FB_IOMEM_HELPERS if DRM_FBDEV_EMULATION
 	select I2C
 	select I2C_ALGOBIT
 	default y
-- 
2.51.0


From ac7759c74a602688c77519f056bd83ab657a73a3 Mon Sep 17 00:00:00 2001
From: Francois Dugast <francois.dugast@intel.com>
Date: Fri, 14 Mar 2025 11:50:50 +0100
Subject: [PATCH 03/16] drm/xe/hw_engine_class_sysfs: Allow to inject error
 during probe

Allow fault injection in a function used during initialization by
xe_hw_engine_class_sysfs_init() so that its error handling can be
tested.

Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Reviewed-by: Tejas Upadhyay <tejas.upadhyay@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20250314105050.636983-1-francois.dugast@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c
index b53e8d2accdb..e238c0e9fdd0 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c
@@ -558,6 +558,7 @@ err_object:
 	kobject_put(kobj);
 	return err;
 }
+ALLOW_ERROR_INJECTION(xe_add_hw_engine_class_defaults, ERRNO); /* See xe_pci_probe() */
 
 static void xe_hw_engine_sysfs_kobj_release(struct kobject *kobj)
 {
-- 
2.51.0


From f2d7e9ba184f79521c83bd8199b7baee7a557ee8 Mon Sep 17 00:00:00 2001
From: Maarten Lankhorst <dev@lankhorst.se>
Date: Thu, 20 Mar 2025 22:15:18 +0100
Subject: [PATCH 04/16] drm/xe: Remove extra spaces in xe_vm.c

There are extra spaces in xe_vm_bind_ioctl_validate_bo(), remove those.

Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20250320211519.632432-1-dev@lankhorst.se
Signed-off-by: Maarten Lankhorst <dev@lankhorst.se>
---
 drivers/gpu/drm/xe/xe_vm.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 60303998bd61..793f5bc393c2 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -3243,7 +3243,7 @@ static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo,
 				 XE_64K_PAGE_MASK) ||
 		    XE_IOCTL_DBG(xe, addr & XE_64K_PAGE_MASK) ||
 		    XE_IOCTL_DBG(xe, range & XE_64K_PAGE_MASK)) {
-			return  -EINVAL;
+			return -EINVAL;
 		}
 	}
 
@@ -3251,7 +3251,7 @@ static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo,
 	if (bo->cpu_caching) {
 		if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE &&
 				 bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) {
-			return  -EINVAL;
+			return -EINVAL;
 		}
 	} else if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE)) {
 		/*
@@ -3260,7 +3260,7 @@ static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo,
 		 * how it was mapped on the CPU. Just assume is it
 		 * potentially cached on CPU side.
 		 */
-		return  -EINVAL;
+		return -EINVAL;
 	}
 
 	/* If a BO is protected it can only be mapped if the key is still valid */
-- 
2.51.0


From 2de3f38fbf89d3cb96d1237aa7a10c0f6480f450 Mon Sep 17 00:00:00 2001
From: Riana Tauro <riana.tauro@intel.com>
Date: Tue, 11 Mar 2025 12:47:57 +0530
Subject: [PATCH 05/16] drm/xe: Add support for per-function engine activity

Add support for function level engine activity stats.
Engine activity stats are enabled when VF's are enabled

v2: remove unnecessary initialization
    move offset to improve code readability (Umesh)
    remove global for function engine activity (Lucas)

v3: fix commit message (Michal)

v4: remove enable function parameter
    fix kernel-doc (Umesh)

Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
Signed-off-by: Riana Tauro <riana.tauro@intel.com>
Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20250311071759.2117211-2-riana.tauro@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
---
 drivers/gpu/drm/xe/abi/guc_actions_abi.h      |   1 +
 drivers/gpu/drm/xe/xe_guc_engine_activity.c   | 200 +++++++++++++++---
 drivers/gpu/drm/xe/xe_guc_engine_activity.h   |   7 +-
 .../gpu/drm/xe/xe_guc_engine_activity_types.h |  12 +-
 drivers/gpu/drm/xe/xe_pmu.c                   |   5 +-
 5 files changed, 192 insertions(+), 33 deletions(-)

diff --git a/drivers/gpu/drm/xe/abi/guc_actions_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_abi.h
index ec516e838ee8..448afb86e05c 100644
--- a/drivers/gpu/drm/xe/abi/guc_actions_abi.h
+++ b/drivers/gpu/drm/xe/abi/guc_actions_abi.h
@@ -141,6 +141,7 @@ enum xe_guc_action {
 	XE_GUC_ACTION_CLIENT_SOFT_RESET = 0x5507,
 	XE_GUC_ACTION_SET_ENG_UTIL_BUFF = 0x550A,
 	XE_GUC_ACTION_SET_DEVICE_ENGINE_ACTIVITY_BUFFER = 0x550C,
+	XE_GUC_ACTION_SET_FUNCTION_ENGINE_ACTIVITY_BUFFER = 0x550D,
 	XE_GUC_ACTION_NOTIFY_MEMORY_CAT_ERROR = 0x6000,
 	XE_GUC_ACTION_REPORT_PAGE_FAULT_REQ_DESC = 0x6002,
 	XE_GUC_ACTION_PAGE_FAULT_RES_DESC = 0x6003,
diff --git a/drivers/gpu/drm/xe/xe_guc_engine_activity.c b/drivers/gpu/drm/xe/xe_guc_engine_activity.c
index 2a457dcf31d5..b96fea78df8b 100644
--- a/drivers/gpu/drm/xe/xe_guc_engine_activity.c
+++ b/drivers/gpu/drm/xe/xe_guc_engine_activity.c
@@ -17,36 +17,61 @@
 #include "xe_hw_engine.h"
 #include "xe_map.h"
 #include "xe_mmio.h"
+#include "xe_sriov_pf_helpers.h"
 #include "xe_trace_guc.h"
 
 #define TOTAL_QUANTA 0x8000
 
-static struct iosys_map engine_activity_map(struct xe_guc *guc, struct xe_hw_engine *hwe)
+static struct iosys_map engine_activity_map(struct xe_guc *guc, struct xe_hw_engine *hwe,
+					    unsigned int index)
 {
 	struct xe_guc_engine_activity *engine_activity = &guc->engine_activity;
-	struct engine_activity_buffer *buffer = &engine_activity->device_buffer;
+	struct engine_activity_buffer *buffer;
 	u16 guc_class = xe_engine_class_to_guc_class(hwe->class);
 	size_t offset;
 
-	offset = offsetof(struct guc_engine_activity_data,
+	if (engine_activity->num_functions) {
+		buffer = &engine_activity->function_buffer;
+		offset = sizeof(struct guc_engine_activity_data) * index;
+	} else {
+		buffer = &engine_activity->device_buffer;
+		offset = 0;
+	}
+
+	offset += offsetof(struct guc_engine_activity_data,
 			  engine_activity[guc_class][hwe->logical_instance]);
 
 	return IOSYS_MAP_INIT_OFFSET(&buffer->activity_bo->vmap, offset);
 }
 
-static struct iosys_map engine_metadata_map(struct xe_guc *guc)
+static struct iosys_map engine_metadata_map(struct xe_guc *guc,
+					    unsigned int index)
 {
 	struct xe_guc_engine_activity *engine_activity = &guc->engine_activity;
-	struct engine_activity_buffer *buffer = &engine_activity->device_buffer;
+	struct engine_activity_buffer *buffer;
+	size_t offset;
 
-	return buffer->metadata_bo->vmap;
+	if (engine_activity->num_functions) {
+		buffer = &engine_activity->function_buffer;
+		offset = sizeof(struct guc_engine_activity_metadata) * index;
+	} else {
+		buffer = &engine_activity->device_buffer;
+		offset = 0;
+	}
+
+	return IOSYS_MAP_INIT_OFFSET(&buffer->metadata_bo->vmap, offset);
 }
 
 static int allocate_engine_activity_group(struct xe_guc *guc)
 {
 	struct xe_guc_engine_activity *engine_activity = &guc->engine_activity;
 	struct xe_device *xe = guc_to_xe(guc);
-	u32 num_activity_group = 1; /* Will be modified for VF */
+	u32 num_activity_group;
+
+	/*
+	 * An additional activity group is allocated for PF
+	 */
+	num_activity_group = IS_SRIOV_PF(xe) ? xe_sriov_pf_get_totalvfs(xe) + 1 : 1;
 
 	engine_activity->eag  = drmm_kcalloc(&xe->drm, num_activity_group,
 					     sizeof(struct engine_activity_group), GFP_KERNEL);
@@ -60,10 +85,11 @@ static int allocate_engine_activity_group(struct xe_guc *guc)
 }
 
 static int allocate_engine_activity_buffers(struct xe_guc *guc,
-					    struct engine_activity_buffer *buffer)
+					    struct engine_activity_buffer *buffer,
+					    int count)
 {
-	u32 metadata_size = sizeof(struct guc_engine_activity_metadata);
-	u32 size = sizeof(struct guc_engine_activity_data);
+	u32 metadata_size = sizeof(struct guc_engine_activity_metadata) * count;
+	u32 size = sizeof(struct guc_engine_activity_data) * count;
 	struct xe_gt *gt = guc_to_gt(guc);
 	struct xe_tile *tile = gt_to_tile(gt);
 	struct xe_bo *bo, *metadata_bo;
@@ -118,10 +144,11 @@ static bool is_engine_activity_supported(struct xe_guc *guc)
 	return true;
 }
 
-static struct engine_activity *hw_engine_to_engine_activity(struct xe_hw_engine *hwe)
+static struct engine_activity *hw_engine_to_engine_activity(struct xe_hw_engine *hwe,
+							    unsigned int index)
 {
 	struct xe_guc *guc = &hwe->gt->uc.guc;
-	struct engine_activity_group *eag = &guc->engine_activity.eag[0];
+	struct engine_activity_group *eag = &guc->engine_activity.eag[index];
 	u16 guc_class = xe_engine_class_to_guc_class(hwe->class);
 
 	return &eag->engine[guc_class][hwe->logical_instance];
@@ -138,9 +165,10 @@ static u64 cpu_ns_to_guc_tsc_tick(ktime_t ns, u32 freq)
 #define read_metadata_record(xe_, map_, field_) \
 	xe_map_rd_field(xe_, map_, 0, struct guc_engine_activity_metadata, field_)
 
-static u64 get_engine_active_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe)
+static u64 get_engine_active_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe,
+				   unsigned int index)
 {
-	struct engine_activity *ea = hw_engine_to_engine_activity(hwe);
+	struct engine_activity *ea = hw_engine_to_engine_activity(hwe, index);
 	struct guc_engine_activity *cached_activity = &ea->activity;
 	struct guc_engine_activity_metadata *cached_metadata = &ea->metadata;
 	struct xe_guc_engine_activity *engine_activity = &guc->engine_activity;
@@ -151,8 +179,8 @@ static u64 get_engine_active_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe)
 	u64 active_ticks, gpm_ts;
 	u16 change_num;
 
-	activity_map = engine_activity_map(guc, hwe);
-	metadata_map = engine_metadata_map(guc);
+	activity_map = engine_activity_map(guc, hwe, index);
+	metadata_map = engine_metadata_map(guc, index);
 	global_change_num = read_metadata_record(xe, &metadata_map, global_change_num);
 
 	/* GuC has not initialized activity data yet, return 0 */
@@ -194,9 +222,9 @@ update:
 	return ea->total + ea->active;
 }
 
-static u64 get_engine_total_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe)
+static u64 get_engine_total_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe, unsigned int index)
 {
-	struct engine_activity *ea = hw_engine_to_engine_activity(hwe);
+	struct engine_activity *ea = hw_engine_to_engine_activity(hwe, index);
 	struct guc_engine_activity_metadata *cached_metadata = &ea->metadata;
 	struct guc_engine_activity *cached_activity = &ea->activity;
 	struct iosys_map activity_map, metadata_map;
@@ -205,8 +233,8 @@ static u64 get_engine_total_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe)
 	u64 numerator;
 	u16 quanta_ratio;
 
-	activity_map = engine_activity_map(guc, hwe);
-	metadata_map = engine_metadata_map(guc);
+	activity_map = engine_activity_map(guc, hwe, index);
+	metadata_map = engine_metadata_map(guc, index);
 
 	if (!cached_metadata->guc_tsc_frequency_hz)
 		cached_metadata->guc_tsc_frequency_hz = read_metadata_record(xe, &metadata_map,
@@ -245,10 +273,35 @@ static int enable_engine_activity_stats(struct xe_guc *guc)
 	return xe_guc_ct_send_block(&guc->ct, action, ARRAY_SIZE(action));
 }
 
-static void engine_activity_set_cpu_ts(struct xe_guc *guc)
+static int enable_function_engine_activity_stats(struct xe_guc *guc, bool enable)
 {
 	struct xe_guc_engine_activity *engine_activity = &guc->engine_activity;
-	struct engine_activity_group *eag = &engine_activity->eag[0];
+	u32 metadata_ggtt_addr = 0, ggtt_addr = 0, num_functions = 0;
+	struct engine_activity_buffer *buffer = &engine_activity->function_buffer;
+	u32 action[6];
+	int len = 0;
+
+	if (enable) {
+		metadata_ggtt_addr = xe_bo_ggtt_addr(buffer->metadata_bo);
+		ggtt_addr = xe_bo_ggtt_addr(buffer->activity_bo);
+		num_functions = engine_activity->num_functions;
+	}
+
+	action[len++] = XE_GUC_ACTION_SET_FUNCTION_ENGINE_ACTIVITY_BUFFER;
+	action[len++] = num_functions;
+	action[len++] = metadata_ggtt_addr;
+	action[len++] = 0;
+	action[len++] = ggtt_addr;
+	action[len++] = 0;
+
+	/* Blocking here to ensure the buffers are ready before reading them */
+	return xe_guc_ct_send_block(&guc->ct, action, ARRAY_SIZE(action));
+}
+
+static void engine_activity_set_cpu_ts(struct xe_guc *guc, unsigned int index)
+{
+	struct xe_guc_engine_activity *engine_activity = &guc->engine_activity;
+	struct engine_activity_group *eag = &engine_activity->eag[index];
 	int i, j;
 
 	for (i = 0; i < GUC_MAX_ENGINE_CLASSES; i++)
@@ -265,34 +318,106 @@ static u32 gpm_timestamp_shift(struct xe_gt *gt)
 	return 3 - REG_FIELD_GET(RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK, reg);
 }
 
+static bool is_function_valid(struct xe_guc *guc, unsigned int fn_id)
+{
+	struct xe_device *xe = guc_to_xe(guc);
+	struct xe_guc_engine_activity *engine_activity = &guc->engine_activity;
+
+	if (!IS_SRIOV_PF(xe) && fn_id)
+		return false;
+
+	if (engine_activity->num_functions && fn_id >= engine_activity->num_functions)
+		return false;
+
+	return true;
+}
+
+static int engine_activity_disable_function_stats(struct xe_guc *guc)
+{
+	struct xe_guc_engine_activity *engine_activity = &guc->engine_activity;
+	struct engine_activity_buffer *buffer = &engine_activity->function_buffer;
+	int ret;
+
+	if (!engine_activity->num_functions)
+		return 0;
+
+	ret = enable_function_engine_activity_stats(guc, false);
+	if (ret)
+		return ret;
+
+	free_engine_activity_buffers(buffer);
+	engine_activity->num_functions = 0;
+
+	return 0;
+}
+
+static int engine_activity_enable_function_stats(struct xe_guc *guc, int num_vfs)
+{
+	struct xe_guc_engine_activity *engine_activity = &guc->engine_activity;
+	struct engine_activity_buffer *buffer = &engine_activity->function_buffer;
+	int ret, i;
+
+	if (!num_vfs)
+		return 0;
+
+	/* This includes 1 PF and num_vfs */
+	engine_activity->num_functions = num_vfs + 1;
+
+	ret = allocate_engine_activity_buffers(guc, buffer, engine_activity->num_functions);
+	if (ret)
+		return ret;
+
+	ret = enable_function_engine_activity_stats(guc, true);
+	if (ret) {
+		free_engine_activity_buffers(buffer);
+		engine_activity->num_functions = 0;
+		return ret;
+	}
+
+	for (i = 0; i < engine_activity->num_functions; i++)
+		engine_activity_set_cpu_ts(guc, i + 1);
+
+	return 0;
+}
+
 /**
  * xe_guc_engine_activity_active_ticks - Get engine active ticks
  * @guc: The GuC object
  * @hwe: The hw_engine object
+ * @fn_id: function id to report on
  *
  * Return: accumulated ticks @hwe was active since engine activity stats were enabled.
  */
-u64 xe_guc_engine_activity_active_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe)
+u64 xe_guc_engine_activity_active_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe,
+					unsigned int fn_id)
 {
 	if (!xe_guc_engine_activity_supported(guc))
 		return 0;
 
-	return get_engine_active_ticks(guc, hwe);
+	if (!is_function_valid(guc, fn_id))
+		return 0;
+
+	return get_engine_active_ticks(guc, hwe, fn_id);
 }
 
 /**
  * xe_guc_engine_activity_total_ticks - Get engine total ticks
  * @guc: The GuC object
  * @hwe: The hw_engine object
+ * @fn_id: function id to report on
  *
  * Return: accumulated quanta of ticks allocated for the engine
  */
-u64 xe_guc_engine_activity_total_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe)
+u64 xe_guc_engine_activity_total_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe,
+				       unsigned int fn_id)
 {
 	if (!xe_guc_engine_activity_supported(guc))
 		return 0;
 
-	return get_engine_total_ticks(guc, hwe);
+	if (!is_function_valid(guc, fn_id))
+		return 0;
+
+	return get_engine_total_ticks(guc, hwe, fn_id);
 }
 
 /**
@@ -310,6 +435,25 @@ bool xe_guc_engine_activity_supported(struct xe_guc *guc)
 	return engine_activity->supported;
 }
 
+/**
+ * xe_guc_engine_activity_function_stats - Enable/Disable per-function engine activity stats
+ * @guc: The GuC object
+ * @num_vfs: number of vfs
+ * @enable: true to enable, false otherwise
+ *
+ * Return: 0 on success, negative error code otherwise
+ */
+int xe_guc_engine_activity_function_stats(struct xe_guc *guc, int num_vfs, bool enable)
+{
+	if (!xe_guc_engine_activity_supported(guc))
+		return 0;
+
+	if (enable)
+		return engine_activity_enable_function_stats(guc, num_vfs);
+
+	return engine_activity_disable_function_stats(guc);
+}
+
 /**
  * xe_guc_engine_activity_enable_stats - Enable engine activity stats
  * @guc: The GuC object
@@ -327,7 +471,7 @@ void xe_guc_engine_activity_enable_stats(struct xe_guc *guc)
 	if (ret)
 		xe_gt_err(guc_to_gt(guc), "failed to enable activity stats%d\n", ret);
 	else
-		engine_activity_set_cpu_ts(guc);
+		engine_activity_set_cpu_ts(guc, 0);
 }
 
 static void engine_activity_fini(void *arg)
@@ -360,7 +504,7 @@ int xe_guc_engine_activity_init(struct xe_guc *guc)
 		return ret;
 	}
 
-	ret = allocate_engine_activity_buffers(guc, &engine_activity->device_buffer);
+	ret = allocate_engine_activity_buffers(guc, &engine_activity->device_buffer, 1);
 	if (ret) {
 		xe_gt_err(gt, "failed to allocate engine activity buffers (%pe)\n", ERR_PTR(ret));
 		return ret;
diff --git a/drivers/gpu/drm/xe/xe_guc_engine_activity.h b/drivers/gpu/drm/xe/xe_guc_engine_activity.h
index a042d4cb404c..b32926c2d208 100644
--- a/drivers/gpu/drm/xe/xe_guc_engine_activity.h
+++ b/drivers/gpu/drm/xe/xe_guc_engine_activity.h
@@ -14,6 +14,9 @@ struct xe_guc;
 int xe_guc_engine_activity_init(struct xe_guc *guc);
 bool xe_guc_engine_activity_supported(struct xe_guc *guc);
 void xe_guc_engine_activity_enable_stats(struct xe_guc *guc);
-u64 xe_guc_engine_activity_active_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe);
-u64 xe_guc_engine_activity_total_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe);
+int xe_guc_engine_activity_function_stats(struct xe_guc *guc, int num_vfs, bool enable);
+u64 xe_guc_engine_activity_active_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe,
+					unsigned int fn_id);
+u64 xe_guc_engine_activity_total_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe,
+				       unsigned int fn_id);
 #endif
diff --git a/drivers/gpu/drm/xe/xe_guc_engine_activity_types.h b/drivers/gpu/drm/xe/xe_guc_engine_activity_types.h
index 5cdd034b6b70..48f69ddefa36 100644
--- a/drivers/gpu/drm/xe/xe_guc_engine_activity_types.h
+++ b/drivers/gpu/drm/xe/xe_guc_engine_activity_types.h
@@ -79,14 +79,24 @@ struct xe_guc_engine_activity {
 	/** @num_activity_group: number of activity groups */
 	u32 num_activity_group;
 
+	/** @num_functions: number of functions */
+	u32 num_functions;
+
 	/** @supported: indicates support for engine activity stats */
 	bool supported;
 
-	/** @eag: holds the device level engine activity data */
+	/**
+	 * @eag: holds the device level engine activity data in native mode.
+	 * In SRIOV mode, points to an array with entries which holds the engine
+	 * activity data for PF and VF's
+	 */
 	struct engine_activity_group *eag;
 
 	/** @device_buffer: buffer object for global engine activity */
 	struct engine_activity_buffer device_buffer;
+
+	/** @function_buffer: buffer object for per-function engine activity */
+	struct engine_activity_buffer function_buffer;
 };
 #endif
 
diff --git a/drivers/gpu/drm/xe/xe_pmu.c b/drivers/gpu/drm/xe/xe_pmu.c
index 4f62a6e515d6..f8f6ebb7c9c6 100644
--- a/drivers/gpu/drm/xe/xe_pmu.c
+++ b/drivers/gpu/drm/xe/xe_pmu.c
@@ -245,10 +245,11 @@ static u64 read_engine_events(struct xe_gt *gt, struct perf_event *event)
 	u64 val = 0;
 
 	hwe = event_to_hwe(event);
+
 	if (config_to_event_id(event->attr.config) == XE_PMU_EVENT_ENGINE_ACTIVE_TICKS)
-		val = xe_guc_engine_activity_active_ticks(&gt->uc.guc, hwe);
+		val = xe_guc_engine_activity_active_ticks(&gt->uc.guc, hwe, 0);
 	else
-		val = xe_guc_engine_activity_total_ticks(&gt->uc.guc, hwe);
+		val = xe_guc_engine_activity_total_ticks(&gt->uc.guc, hwe, 0);
 
 	return val;
 }
-- 
2.51.0


From 8a4339fe2422a8080e45b76b4938827c807dcd52 Mon Sep 17 00:00:00 2001
From: Riana Tauro <riana.tauro@intel.com>
Date: Tue, 11 Mar 2025 12:47:58 +0530
Subject: [PATCH 06/16] drm/xe/xe_pmu: Add PMU support for per-function engine
 activity stats

Add PMU support for per-function engine activity stats.

per-function engine activity is enabled when VF's are enabled.
If 2 VF's are enabled, then the applicable function values are

0 - PF engine activity
1 - VF1 engine activity
2 - VF2 engine activity

This can be read from perf tool as shown below

./perf stat -e xe_<bdf>/engine-active-ticks,gt=0,engine_class=0,
			 engine_instance=0,function=1/ -I 1000

v2: fix documentation (Umesh)
    remove global for functions (Lucas, Michal)

v3: fix commit message
    move function_id checks to same place (Michal)

v4: fix comment (Umesh)

Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
Signed-off-by: Riana Tauro <riana.tauro@intel.com>
Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20250311071759.2117211-3-riana.tauro@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
---
 drivers/gpu/drm/xe/xe_pmu.c | 39 +++++++++++++++++++++++++++++--------
 1 file changed, 31 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_pmu.c b/drivers/gpu/drm/xe/xe_pmu.c
index f8f6ebb7c9c6..fde4222675fd 100644
--- a/drivers/gpu/drm/xe/xe_pmu.c
+++ b/drivers/gpu/drm/xe/xe_pmu.c
@@ -13,6 +13,7 @@
 #include "xe_hw_engine.h"
 #include "xe_pm.h"
 #include "xe_pmu.h"
+#include "xe_sriov_pf_helpers.h"
 
 /**
  * DOC: Xe PMU (Performance Monitoring Unit)
@@ -32,9 +33,10 @@
  *	gt[60:63]		Selects gt for the event
  *	engine_class[20:27]	Selects engine-class for event
  *	engine_instance[12:19]	Selects the engine-instance for the event
+ *	function[44:59]		Selects the function of the event (SRIOV enabled)
  *
  * For engine specific events (engine-*), gt, engine_class and engine_instance parameters must be
- * set as populated by DRM_XE_DEVICE_QUERY_ENGINES.
+ * set as populated by DRM_XE_DEVICE_QUERY_ENGINES and function if SRIOV is enabled.
  *
  * For gt specific events (gt-*) gt parameter must be passed. All other parameters will be 0.
  *
@@ -49,6 +51,7 @@
  */
 
 #define XE_PMU_EVENT_GT_MASK			GENMASK_ULL(63, 60)
+#define XE_PMU_EVENT_FUNCTION_MASK		GENMASK_ULL(59, 44)
 #define XE_PMU_EVENT_ENGINE_CLASS_MASK		GENMASK_ULL(27, 20)
 #define XE_PMU_EVENT_ENGINE_INSTANCE_MASK	GENMASK_ULL(19, 12)
 #define XE_PMU_EVENT_ID_MASK			GENMASK_ULL(11, 0)
@@ -58,6 +61,11 @@ static unsigned int config_to_event_id(u64 config)
 	return FIELD_GET(XE_PMU_EVENT_ID_MASK, config);
 }
 
+static unsigned int config_to_function_id(u64 config)
+{
+	return FIELD_GET(XE_PMU_EVENT_FUNCTION_MASK, config);
+}
+
 static unsigned int config_to_engine_class(u64 config)
 {
 	return FIELD_GET(XE_PMU_EVENT_ENGINE_CLASS_MASK, config);
@@ -151,7 +159,7 @@ static bool event_supported(struct xe_pmu *pmu, unsigned int gt,
 static bool event_param_valid(struct perf_event *event)
 {
 	struct xe_device *xe = container_of(event->pmu, typeof(*xe), pmu.base);
-	unsigned int engine_class, engine_instance;
+	unsigned int engine_class, engine_instance, function_id;
 	u64 config = event->attr.config;
 	struct xe_gt *gt;
 
@@ -161,16 +169,26 @@ static bool event_param_valid(struct perf_event *event)
 
 	engine_class = config_to_engine_class(config);
 	engine_instance = config_to_engine_instance(config);
+	function_id = config_to_function_id(config);
 
 	switch (config_to_event_id(config)) {
 	case XE_PMU_EVENT_GT_C6_RESIDENCY:
-		if (engine_class || engine_instance)
+		if (engine_class || engine_instance || function_id)
 			return false;
 		break;
 	case XE_PMU_EVENT_ENGINE_ACTIVE_TICKS:
 	case XE_PMU_EVENT_ENGINE_TOTAL_TICKS:
 		if (!event_to_hwe(event))
 			return false;
+
+		/* PF(0) and total vfs when SRIOV is enabled */
+		if (IS_SRIOV_PF(xe)) {
+			if (function_id > xe_sriov_pf_get_totalvfs(xe))
+				return false;
+		} else if (function_id) {
+			return false;
+		}
+
 		break;
 	}
 
@@ -242,14 +260,17 @@ static int xe_pmu_event_init(struct perf_event *event)
 static u64 read_engine_events(struct xe_gt *gt, struct perf_event *event)
 {
 	struct xe_hw_engine *hwe;
-	u64 val = 0;
+	unsigned int function_id;
+	u64 config, val = 0;
 
-	hwe = event_to_hwe(event);
+	config = event->attr.config;
+	function_id = config_to_function_id(config);
 
-	if (config_to_event_id(event->attr.config) == XE_PMU_EVENT_ENGINE_ACTIVE_TICKS)
-		val = xe_guc_engine_activity_active_ticks(&gt->uc.guc, hwe, 0);
+	hwe = event_to_hwe(event);
+	if (config_to_event_id(config) == XE_PMU_EVENT_ENGINE_ACTIVE_TICKS)
+		val = xe_guc_engine_activity_active_ticks(&gt->uc.guc, hwe, function_id);
 	else
-		val = xe_guc_engine_activity_total_ticks(&gt->uc.guc, hwe, 0);
+		val = xe_guc_engine_activity_total_ticks(&gt->uc.guc, hwe, function_id);
 
 	return val;
 }
@@ -352,6 +373,7 @@ static void xe_pmu_event_del(struct perf_event *event, int flags)
 }
 
 PMU_FORMAT_ATTR(gt,			"config:60-63");
+PMU_FORMAT_ATTR(function,		"config:44-59");
 PMU_FORMAT_ATTR(engine_class,		"config:20-27");
 PMU_FORMAT_ATTR(engine_instance,	"config:12-19");
 PMU_FORMAT_ATTR(event,			"config:0-11");
@@ -360,6 +382,7 @@ static struct attribute *pmu_format_attrs[] = {
 	&format_attr_event.attr,
 	&format_attr_engine_class.attr,
 	&format_attr_engine_instance.attr,
+	&format_attr_function.attr,
 	&format_attr_gt.attr,
 	NULL,
 };
-- 
2.51.0


From bf6cb06c0e05ff6fb5bfab3e5d7c22e4617e23e0 Mon Sep 17 00:00:00 2001
From: Riana Tauro <riana.tauro@intel.com>
Date: Tue, 11 Mar 2025 12:47:59 +0530
Subject: [PATCH 07/16] drm/xe/pf: Enable per-function engine activity stats

Enable per-function engine activity stats when VF's are enabled
and disable when VF's are disabled

v2: fix commit message
    remove reset stats from pf config (Michal)

Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
Signed-off-by: Riana Tauro <riana.tauro@intel.com>
Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20250311071759.2117211-4-riana.tauro@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
---
 drivers/gpu/drm/xe/xe_pci_sriov.c | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_pci_sriov.c b/drivers/gpu/drm/xe/xe_pci_sriov.c
index 09ee8a06fe2e..d69b6b2a3061 100644
--- a/drivers/gpu/drm/xe/xe_pci_sriov.c
+++ b/drivers/gpu/drm/xe/xe_pci_sriov.c
@@ -7,6 +7,7 @@
 #include "xe_device.h"
 #include "xe_gt_sriov_pf_config.h"
 #include "xe_gt_sriov_pf_control.h"
+#include "xe_guc_engine_activity.h"
 #include "xe_pci_sriov.h"
 #include "xe_pm.h"
 #include "xe_sriov.h"
@@ -111,6 +112,20 @@ static void pf_link_vfs(struct xe_device *xe, int num_vfs)
 	}
 }
 
+static void pf_engine_activity_stats(struct xe_device *xe, unsigned int num_vfs, bool enable)
+{
+	struct xe_gt *gt;
+	unsigned int id;
+	int ret = 0;
+
+	for_each_gt(gt, xe, id) {
+		ret = xe_guc_engine_activity_function_stats(&gt->uc.guc, num_vfs, enable);
+		if (ret)
+			xe_sriov_info(xe, "Failed to %s engine activity function stats (%pe)\n",
+				      str_enable_disable(enable), ERR_PTR(ret));
+	}
+}
+
 static int pf_enable_vfs(struct xe_device *xe, int num_vfs)
 {
 	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
@@ -145,6 +160,9 @@ static int pf_enable_vfs(struct xe_device *xe, int num_vfs)
 
 	xe_sriov_info(xe, "Enabled %u of %u VF%s\n",
 		      num_vfs, total_vfs, str_plural(total_vfs));
+
+	pf_engine_activity_stats(xe, num_vfs, true);
+
 	return num_vfs;
 
 failed:
@@ -168,6 +186,8 @@ static int pf_disable_vfs(struct xe_device *xe)
 	if (!num_vfs)
 		return 0;
 
+	pf_engine_activity_stats(xe, num_vfs, false);
+
 	pci_disable_sriov(pdev);
 
 	pf_reset_vfs(xe, num_vfs);
-- 
2.51.0


From 6fe653f82402fafecb913375e74f386f52c10938 Mon Sep 17 00:00:00 2001
From: Nakshtra Goyal <nakshtra.goyal@intel.com>
Date: Thu, 27 Feb 2025 15:53:39 +0530
Subject: [PATCH 08/16] drm/xe: Add fault injection for xe_oa_alloc_regs

Add fault injection for xe_oa_alloc_regs to allow it to fail while
executing xe_oa_add_config_ioctl().
This need to be added as it cannot be reached by injecting error through
IOCTL arguments.

Signed-off-by: Nakshtra Goyal <nakshtra.goyal@intel.com>
Reviewed-by: Francois Dugast <francois.dugast@intel.com>
Reviewed-by: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Link: https://lore.kernel.org/r/20250227102339.2859726-1-nakshtra.goyal@intel.com
Signed-off-by: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
---
 drivers/gpu/drm/xe/xe_oa.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
index 6f185632da14..75655b1f17f9 100644
--- a/drivers/gpu/drm/xe/xe_oa.c
+++ b/drivers/gpu/drm/xe/xe_oa.c
@@ -2221,6 +2221,7 @@ addr_err:
 	kfree(oa_regs);
 	return ERR_PTR(err);
 }
+ALLOW_ERROR_INJECTION(xe_oa_alloc_regs, ERRNO);
 
 static ssize_t show_dynamic_id(struct kobject *kobj,
 			       struct kobj_attribute *attr,
-- 
2.51.0


From cca9734ebe55f6af11ce8d57ca1afdc4d158c808 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Tue, 25 Mar 2025 15:47:10 -0700
Subject: [PATCH 09/16] drm/xe/bmg: Add one additional PCI ID

One additional BMG PCI ID has been added to the spec; make sure our
driver recognizes devices with this ID properly.

Bspec: 68090
Cc: stable@vger.kernel.org # v6.12+
Reviewed-by: Clint Taylor <Clinton.A.Taylor@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20250325224709.4073080-2-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
---
 include/drm/intel/pciids.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/drm/intel/pciids.h b/include/drm/intel/pciids.h
index 4736ea525048..d212848d07f3 100644
--- a/include/drm/intel/pciids.h
+++ b/include/drm/intel/pciids.h
@@ -850,6 +850,7 @@
 	MACRO__(0xE20C, ## __VA_ARGS__), \
 	MACRO__(0xE20D, ## __VA_ARGS__), \
 	MACRO__(0xE210, ## __VA_ARGS__), \
+	MACRO__(0xE211, ## __VA_ARGS__), \
 	MACRO__(0xE212, ## __VA_ARGS__), \
 	MACRO__(0xE215, ## __VA_ARGS__), \
 	MACRO__(0xE216, ## __VA_ARGS__)
-- 
2.51.0


From 6c55404d4f7306889eca9c333066bb4bdc2a4a7c Mon Sep 17 00:00:00 2001
From: =?utf8?q?Thomas=20Hellstr=C3=B6m?= <thomas.hellstrom@linux.intel.com>
Date: Wed, 26 Mar 2025 09:05:47 +0100
Subject: [PATCH 10/16] drm/xe: Introduce CONFIG_DRM_XE_GPUSVM
MIME-Version: 1.0
Content-Type: text/plain; charset=utf8
Content-Transfer-Encoding: 8bit

Don't rely on CONFIG_DRM_GPUSVM because other drivers may enable it
causing us to compile in SVM support unintentionally.

Also take the opportunity to leave more code out of compilation if
!CONFIG_DRM_XE_GPUSVM and !CONFIG_DRM_XE_DEVMEM_MIRROR

v3:
- Fixes for compilation errors on 32-bit. This changes the Kconfig
  logic a bit.

Signed-off-by: Thomas HellstrÃ¶m <thomas.hellstrom@linux.intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Link: https://lore.kernel.org/r/20250326080551.40201-2-thomas.hellstrom@linux.intel.com
---
 drivers/gpu/drm/xe/Kconfig           | 16 +++++-
 drivers/gpu/drm/xe/Makefile          |  2 +-
 drivers/gpu/drm/xe/xe_device_types.h |  6 ++-
 drivers/gpu/drm/xe/xe_migrate.c      |  3 ++
 drivers/gpu/drm/xe/xe_pt.c           |  6 +++
 drivers/gpu/drm/xe/xe_query.c        |  2 +-
 drivers/gpu/drm/xe/xe_svm.c          | 15 ++++++
 drivers/gpu/drm/xe/xe_svm.h          | 73 ++++++++++++++++++++--------
 drivers/gpu/drm/xe/xe_vm.c           |  2 +-
 9 files changed, 98 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig
index 5c2f459a2925..9bce047901b2 100644
--- a/drivers/gpu/drm/xe/Kconfig
+++ b/drivers/gpu/drm/xe/Kconfig
@@ -39,7 +39,6 @@ config DRM_XE
 	select DRM_TTM_HELPER
 	select DRM_EXEC
 	select DRM_GPUVM
-	select DRM_GPUSVM if !UML && DEVICE_PRIVATE
 	select DRM_SCHED
 	select MMU_NOTIFIER
 	select WANT_DEV_COREDUMP
@@ -74,9 +73,22 @@ config DRM_XE_DP_TUNNEL
 
 	  If in doubt say "Y".
 
+config DRM_XE_GPUSVM
+	bool "Enable CPU to GPU address mirroring"
+	depends on DRM_XE
+	depends on !UML
+	depends on DEVICE_PRIVATE
+	default y
+	select DRM_GPUSVM
+	help
+	  Enable this option if you want support for CPU to GPU address
+	  mirroring.
+
+	  If in doubut say "Y".
+
 config DRM_XE_DEVMEM_MIRROR
 	bool "Enable device memory mirror"
-	depends on DRM_XE
+	depends on DRM_XE_GPUSVM
 	select GET_FREE_REGION
 	default y
 	help
diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index 9699b08585f7..e4fec90bab55 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -125,7 +125,7 @@ xe-y += xe_bb.o \
 	xe_wopcm.o
 
 xe-$(CONFIG_HMM_MIRROR) += xe_hmm.o
-xe-$(CONFIG_DRM_GPUSVM) += xe_svm.o
+xe-$(CONFIG_DRM_XE_GPUSVM) += xe_svm.o
 
 # graphics hardware monitoring (HWMON) support
 xe-$(CONFIG_HWMON) += xe_hwmon.o
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 1334174388af..4472bf849f2b 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -107,6 +107,9 @@ struct xe_vram_region {
 	resource_size_t actual_physical_size;
 	/** @mapping: pointer to VRAM mappable space */
 	void __iomem *mapping;
+	/** @ttm: VRAM TTM manager */
+	struct xe_ttm_vram_mgr ttm;
+#if IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR)
 	/** @pagemap: Used to remap device memory as ZONE_DEVICE */
 	struct dev_pagemap pagemap;
 	/**
@@ -120,8 +123,7 @@ struct xe_vram_region {
 	 * This is generated when remap device memory as ZONE_DEVICE
 	 */
 	resource_size_t hpa_base;
-	/** @ttm: VRAM TTM manager */
-	struct xe_ttm_vram_mgr ttm;
+#endif
 };
 
 /**
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index df4282c71bf0..d364c9f458e7 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -1544,6 +1544,7 @@ void xe_migrate_wait(struct xe_migrate *m)
 		dma_fence_wait(m->fence, false);
 }
 
+#if IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR)
 static u32 pte_update_cmd_size(u64 size)
 {
 	u32 num_dword;
@@ -1719,6 +1720,8 @@ struct dma_fence *xe_migrate_from_vram(struct xe_migrate *m,
 			       XE_MIGRATE_COPY_TO_SRAM);
 }
 
+#endif
+
 #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
 #include "tests/xe_migrate.c"
 #endif
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index ffaf0d02dc7d..9e719535a3bb 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -1420,6 +1420,7 @@ static int xe_pt_userptr_pre_commit(struct xe_migrate_pt_update *pt_update)
 	return err;
 }
 
+#if IS_ENABLED(CONFIG_DRM_XE_GPUSVM)
 static int xe_pt_svm_pre_commit(struct xe_migrate_pt_update *pt_update)
 {
 	struct xe_vm *vm = pt_update->vops->vm;
@@ -1453,6 +1454,7 @@ static int xe_pt_svm_pre_commit(struct xe_migrate_pt_update *pt_update)
 
 	return 0;
 }
+#endif
 
 struct invalidation_fence {
 	struct xe_gt_tlb_invalidation_fence base;
@@ -2257,11 +2259,15 @@ static const struct xe_migrate_pt_update_ops userptr_migrate_ops = {
 	.pre_commit = xe_pt_userptr_pre_commit,
 };
 
+#if IS_ENABLED(CONFIG_DRM_XE_GPUSVM)
 static const struct xe_migrate_pt_update_ops svm_migrate_ops = {
 	.populate = xe_vm_populate_pgtable,
 	.clear = xe_migrate_clear_pgtable_callback,
 	.pre_commit = xe_pt_svm_pre_commit,
 };
+#else
+static const struct xe_migrate_pt_update_ops svm_migrate_ops;
+#endif
 
 /**
  * xe_pt_update_ops_run() - Run PT update operations
diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
index 5e65830dad25..2dbf4066d86f 100644
--- a/drivers/gpu/drm/xe/xe_query.c
+++ b/drivers/gpu/drm/xe/xe_query.c
@@ -340,7 +340,7 @@ static int query_config(struct xe_device *xe, struct drm_xe_device_query *query)
 	if (xe_device_get_root_tile(xe)->mem.vram.usable_size)
 		config->info[DRM_XE_QUERY_CONFIG_FLAGS] |=
 			DRM_XE_QUERY_CONFIG_FLAG_HAS_VRAM;
-	if (xe->info.has_usm && IS_ENABLED(CONFIG_DRM_GPUSVM))
+	if (xe->info.has_usm && IS_ENABLED(CONFIG_DRM_XE_GPUSVM))
 		config->info[DRM_XE_QUERY_CONFIG_FLAGS] |=
 			DRM_XE_QUERY_CONFIG_FLAG_HAS_CPU_ADDR_MIRROR;
 	config->info[DRM_XE_QUERY_CONFIG_FLAGS] |=
diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c
index 08617a62ab07..52613dd8573a 100644
--- a/drivers/gpu/drm/xe/xe_svm.c
+++ b/drivers/gpu/drm/xe/xe_svm.c
@@ -340,6 +340,8 @@ static void xe_svm_garbage_collector_work_func(struct work_struct *w)
 	up_write(&vm->lock);
 }
 
+#if IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR)
+
 static struct xe_vram_region *page_to_vr(struct page *page)
 {
 	return container_of(page->pgmap, struct xe_vram_region, pagemap);
@@ -578,6 +580,8 @@ static const struct drm_gpusvm_devmem_ops gpusvm_devmem_ops = {
 	.copy_to_ram = xe_svm_copy_to_ram,
 };
 
+#endif
+
 static const struct drm_gpusvm_ops gpusvm_ops = {
 	.range_alloc = xe_svm_range_alloc,
 	.range_free = xe_svm_range_free,
@@ -651,6 +655,7 @@ static bool xe_svm_range_is_valid(struct xe_svm_range *range,
 	return (range->tile_present & ~range->tile_invalidated) & BIT(tile->id);
 }
 
+#if IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR)
 static struct xe_vram_region *tile_to_vr(struct xe_tile *tile)
 {
 	return &tile->mem.vram;
@@ -709,6 +714,15 @@ unlock:
 
 	return err;
 }
+#else
+static int xe_svm_alloc_vram(struct xe_vm *vm, struct xe_tile *tile,
+			     struct xe_svm_range *range,
+			     const struct drm_gpusvm_ctx *ctx)
+{
+	return -EOPNOTSUPP;
+}
+#endif
+
 
 /**
  * xe_svm_handle_pagefault() - SVM handle page fault
@@ -867,6 +881,7 @@ int xe_svm_bo_evict(struct xe_bo *bo)
 }
 
 #if IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR)
+
 static struct drm_pagemap_device_addr
 xe_drm_pagemap_device_map(struct drm_pagemap *dpagemap,
 			  struct device *dev,
diff --git a/drivers/gpu/drm/xe/xe_svm.h b/drivers/gpu/drm/xe/xe_svm.h
index 93442738666e..3d441eb1f7ea 100644
--- a/drivers/gpu/drm/xe/xe_svm.h
+++ b/drivers/gpu/drm/xe/xe_svm.h
@@ -6,6 +6,8 @@
 #ifndef _XE_SVM_H_
 #define _XE_SVM_H_
 
+#if IS_ENABLED(CONFIG_DRM_XE_GPUSVM)
+
 #include <drm/drm_pagemap.h>
 #include <drm/drm_gpusvm.h>
 
@@ -44,7 +46,6 @@ struct xe_svm_range {
 	u8 skip_migrate	:1;
 };
 
-#if IS_ENABLED(CONFIG_DRM_GPUSVM)
 /**
  * xe_svm_range_pages_valid() - SVM range pages valid
  * @range: SVM range
@@ -73,7 +74,50 @@ bool xe_svm_has_mapping(struct xe_vm *vm, u64 start, u64 end);
 int xe_svm_bo_evict(struct xe_bo *bo);
 
 void xe_svm_range_debug(struct xe_svm_range *range, const char *operation);
+
+/**
+ * xe_svm_range_has_dma_mapping() - SVM range has DMA mapping
+ * @range: SVM range
+ *
+ * Return: True if SVM range has a DMA mapping, False otherwise
+ */
+static inline bool xe_svm_range_has_dma_mapping(struct xe_svm_range *range)
+{
+	lockdep_assert_held(&range->base.gpusvm->notifier_lock);
+	return range->base.flags.has_dma_mapping;
+}
+
+#define xe_svm_assert_in_notifier(vm__) \
+	lockdep_assert_held_write(&(vm__)->svm.gpusvm.notifier_lock)
+
+#define xe_svm_notifier_lock(vm__)	\
+	drm_gpusvm_notifier_lock(&(vm__)->svm.gpusvm)
+
+#define xe_svm_notifier_unlock(vm__)	\
+	drm_gpusvm_notifier_unlock(&(vm__)->svm.gpusvm)
+
 #else
+#include <linux/interval_tree.h>
+
+struct drm_pagemap_device_addr;
+struct xe_bo;
+struct xe_gt;
+struct xe_vm;
+struct xe_vma;
+struct xe_tile;
+struct xe_vram_region;
+
+#define XE_INTERCONNECT_VRAM 1
+
+struct xe_svm_range {
+	struct {
+		struct interval_tree_node itree;
+		const struct drm_pagemap_device_addr *dma_addr;
+	} base;
+	u32 tile_present;
+	u32 tile_invalidated;
+};
+
 static inline bool xe_svm_range_pages_valid(struct xe_svm_range *range)
 {
 	return false;
@@ -125,27 +169,16 @@ static inline
 void xe_svm_range_debug(struct xe_svm_range *range, const char *operation)
 {
 }
-#endif
 
-/**
- * xe_svm_range_has_dma_mapping() - SVM range has DMA mapping
- * @range: SVM range
- *
- * Return: True if SVM range has a DMA mapping, False otherwise
- */
-static inline bool xe_svm_range_has_dma_mapping(struct xe_svm_range *range)
+#define xe_svm_assert_in_notifier(...) do {} while (0)
+#define xe_svm_range_has_dma_mapping(...) false
+
+static inline void xe_svm_notifier_lock(struct xe_vm *vm)
 {
-	lockdep_assert_held(&range->base.gpusvm->notifier_lock);
-	return range->base.flags.has_dma_mapping;
 }
 
-#define xe_svm_assert_in_notifier(vm__) \
-	lockdep_assert_held_write(&(vm__)->svm.gpusvm.notifier_lock)
-
-#define xe_svm_notifier_lock(vm__)	\
-	drm_gpusvm_notifier_lock(&(vm__)->svm.gpusvm)
-
-#define xe_svm_notifier_unlock(vm__)	\
-	drm_gpusvm_notifier_unlock(&(vm__)->svm.gpusvm)
-
+static inline void xe_svm_notifier_unlock(struct xe_vm *vm)
+{
+}
+#endif
 #endif
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 793f5bc393c2..864266e38aa7 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -3109,7 +3109,7 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, struct xe_vm *vm,
 
 		if (XE_IOCTL_DBG(xe, is_cpu_addr_mirror &&
 				 (!xe_vm_in_fault_mode(vm) ||
-				 !IS_ENABLED(CONFIG_DRM_GPUSVM)))) {
+				 !IS_ENABLED(CONFIG_DRM_XE_GPUSVM)))) {
 			err = -EINVAL;
 			goto free_bind_ops;
 		}
-- 
2.51.0


From c9db07cab766b665c8fa1184649cef452f448dc8 Mon Sep 17 00:00:00 2001
From: =?utf8?q?Thomas=20Hellstr=C3=B6m?= <thomas.hellstrom@linux.intel.com>
Date: Wed, 26 Mar 2025 09:05:48 +0100
Subject: [PATCH 11/16] drm/xe/svm: Fix a potential bo UAF
MIME-Version: 1.0
Content-Type: text/plain; charset=utf8
Content-Transfer-Encoding: 8bit

If drm_gpusvm_migrate_to_devmem() succeeds, if a cpu access happens to the
range the bo may be freed before xe_bo_unlock(), causing a UAF.

Since the reference is transferred, use xe_svm_devmem_release() to
release the reference on drm_gpusvm_migrate_to_devmem() failure,
and hold a local reference to protect the UAF.

Fixes: 2f118c949160 ("drm/xe: Add SVM VRAM migration")
Signed-off-by: Thomas HellstrÃ¶m <thomas.hellstrom@linux.intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Link: https://lore.kernel.org/r/20250326080551.40201-3-thomas.hellstrom@linux.intel.com
---
 drivers/gpu/drm/xe/xe_svm.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c
index 52613dd8573a..c7424c824a14 100644
--- a/drivers/gpu/drm/xe/xe_svm.c
+++ b/drivers/gpu/drm/xe/xe_svm.c
@@ -702,11 +702,14 @@ retry:
 	list_for_each_entry(block, blocks, link)
 		block->private = vr;
 
+	xe_bo_get(bo);
 	err = drm_gpusvm_migrate_to_devmem(&vm->svm.gpusvm, &range->base,
 					   &bo->devmem_allocation, ctx);
-	xe_bo_unlock(bo);
 	if (err)
-		xe_bo_put(bo);	/* Creation ref */
+		xe_svm_devmem_release(&bo->devmem_allocation);
+
+	xe_bo_unlock(bo);
+	xe_bo_put(bo);
 
 unlock:
 	mmap_read_unlock(mm);
-- 
2.51.0


From 3cbb651117ff8a23ab22de595754e9d66b533ab9 Mon Sep 17 00:00:00 2001
From: =?utf8?q?Thomas=20Hellstr=C3=B6m?= <thomas.hellstrom@linux.intel.com>
Date: Wed, 26 Mar 2025 09:05:49 +0100
Subject: [PATCH 12/16] drm/xe/bo: Add a bo remove callback
MIME-Version: 1.0
Content-Type: text/plain; charset=utf8
Content-Transfer-Encoding: 8bit

On device unbind, migrate exported bos, including pagemap bos to
system. This allows importers to take proper action without
disruption. In particular, SVM clients on remote devices may
continue as if nothing happened, and can chose a different
placement.

The evict_flags() placement is chosen in such a way that bos that
aren't exported are purged.

For pinned bos, we unmap DMA, but their pages are not freed yet
since we can't be 100% sure they are not accessed.

All pinned external bos (not just the VRAM ones) are put on the
pinned.external list with this patch. But this only affects the
xe_bo_pci_dev_remove_pinned() function since !VRAM bos are
ignored by the suspend / resume functionality. As a follow-up we
could look at removing the suspend / resume iteration over
pinned external bos since we currently don't allow pinning
external bos in VRAM, and other external bos don't need any
special treatment at suspend / resume.

v2:
- Address review comments. (Matthew Auld).
v3:
- Don't introduce an external_evicted list (Matthew Auld)
- Add a discussion around suspend / resume behaviour to the
  commit message.
- Formatting fixes.
v4:
- Move dma-unmaps of pinned kernel bos to a dev managed
  callback to give subsystems using these bos a chance to
  clean them up. (Matthew Auld)

Signed-off-by: Thomas HellstrÃ¶m <thomas.hellstrom@linux.intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Link: https://lore.kernel.org/r/20250326080551.40201-4-thomas.hellstrom@linux.intel.com
---
 drivers/gpu/drm/xe/xe_bo.c           | 54 +++++++++++++++---
 drivers/gpu/drm/xe/xe_bo.h           |  2 +
 drivers/gpu/drm/xe/xe_bo_evict.c     | 84 ++++++++++++++++++++++++++--
 drivers/gpu/drm/xe/xe_bo_evict.h     |  3 +
 drivers/gpu/drm/xe/xe_device.c       | 10 ++--
 drivers/gpu/drm/xe/xe_device_types.h |  4 +-
 6 files changed, 140 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 64f9c936eea0..9d043d2c30fd 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -55,6 +55,8 @@ static struct ttm_placement sys_placement = {
 	.placement = &sys_placement_flags,
 };
 
+static struct ttm_placement purge_placement;
+
 static const struct ttm_place tt_placement_flags[] = {
 	{
 		.fpfn = 0,
@@ -281,6 +283,8 @@ int xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo,
 static void xe_evict_flags(struct ttm_buffer_object *tbo,
 			   struct ttm_placement *placement)
 {
+	struct xe_device *xe = container_of(tbo->bdev, typeof(*xe), ttm);
+	bool device_unplugged = drm_dev_is_unplugged(&xe->drm);
 	struct xe_bo *bo;
 
 	if (!xe_bo_is_xe_bo(tbo)) {
@@ -290,7 +294,7 @@ static void xe_evict_flags(struct ttm_buffer_object *tbo,
 			return;
 		}
 
-		*placement = sys_placement;
+		*placement = device_unplugged ? purge_placement : sys_placement;
 		return;
 	}
 
@@ -300,6 +304,11 @@ static void xe_evict_flags(struct ttm_buffer_object *tbo,
 		return;
 	}
 
+	if (device_unplugged && !tbo->base.dma_buf) {
+		*placement = purge_placement;
+		return;
+	}
+
 	/*
 	 * For xe, sg bos that are evicted to system just triggers a
 	 * rebind of the sg list upon subsequent validation to XE_PL_TT.
@@ -657,11 +666,20 @@ static int xe_bo_move_dmabuf(struct ttm_buffer_object *ttm_bo,
 	struct xe_ttm_tt *xe_tt = container_of(ttm_bo->ttm, struct xe_ttm_tt,
 					       ttm);
 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
+	bool device_unplugged = drm_dev_is_unplugged(&xe->drm);
 	struct sg_table *sg;
 
 	xe_assert(xe, attach);
 	xe_assert(xe, ttm_bo->ttm);
 
+	if (device_unplugged && new_res->mem_type == XE_PL_SYSTEM &&
+	    ttm_bo->sg) {
+		dma_resv_wait_timeout(ttm_bo->base.resv, DMA_RESV_USAGE_BOOKKEEP,
+				      false, MAX_SCHEDULE_TIMEOUT);
+		dma_buf_unmap_attachment(attach, ttm_bo->sg, DMA_BIDIRECTIONAL);
+		ttm_bo->sg = NULL;
+	}
+
 	if (new_res->mem_type == XE_PL_SYSTEM)
 		goto out;
 
@@ -1224,6 +1242,31 @@ err_res_free:
 	return ret;
 }
 
+int xe_bo_dma_unmap_pinned(struct xe_bo *bo)
+{
+	struct ttm_buffer_object *ttm_bo = &bo->ttm;
+	struct ttm_tt *tt = ttm_bo->ttm;
+
+	if (tt) {
+		struct xe_ttm_tt *xe_tt = container_of(tt, typeof(*xe_tt), ttm);
+
+		if (ttm_bo->type == ttm_bo_type_sg && ttm_bo->sg) {
+			dma_buf_unmap_attachment(ttm_bo->base.import_attach,
+						 ttm_bo->sg,
+						 DMA_BIDIRECTIONAL);
+			ttm_bo->sg = NULL;
+			xe_tt->sg = NULL;
+		} else if (xe_tt->sg) {
+			dma_unmap_sgtable(xe_tt->xe->drm.dev, xe_tt->sg,
+					  DMA_BIDIRECTIONAL, 0);
+			sg_free_table(xe_tt->sg);
+			xe_tt->sg = NULL;
+		}
+	}
+
+	return 0;
+}
+
 static unsigned long xe_ttm_io_mem_pfn(struct ttm_buffer_object *ttm_bo,
 				       unsigned long page_offset)
 {
@@ -2102,12 +2145,9 @@ int xe_bo_pin_external(struct xe_bo *bo)
 		if (err)
 			return err;
 
-		if (xe_bo_is_vram(bo)) {
-			spin_lock(&xe->pinned.lock);
-			list_add_tail(&bo->pinned_link,
-				      &xe->pinned.external_vram);
-			spin_unlock(&xe->pinned.lock);
-		}
+		spin_lock(&xe->pinned.lock);
+		list_add_tail(&bo->pinned_link, &xe->pinned.external);
+		spin_unlock(&xe->pinned.lock);
 	}
 
 	ttm_bo_pin(&bo->ttm);
diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
index ec3e4446d027..05479240bf75 100644
--- a/drivers/gpu/drm/xe/xe_bo.h
+++ b/drivers/gpu/drm/xe/xe_bo.h
@@ -276,6 +276,8 @@ int xe_bo_evict(struct xe_bo *bo, bool force_alloc);
 int xe_bo_evict_pinned(struct xe_bo *bo);
 int xe_bo_restore_pinned(struct xe_bo *bo);
 
+int xe_bo_dma_unmap_pinned(struct xe_bo *bo);
+
 extern const struct ttm_device_funcs xe_ttm_funcs;
 extern const char *const xe_mem_type_to_name[];
 
diff --git a/drivers/gpu/drm/xe/xe_bo_evict.c b/drivers/gpu/drm/xe/xe_bo_evict.c
index 1eeb3910450b..a1f0661e7b0c 100644
--- a/drivers/gpu/drm/xe/xe_bo_evict.c
+++ b/drivers/gpu/drm/xe/xe_bo_evict.c
@@ -93,8 +93,8 @@ int xe_bo_evict_all(struct xe_device *xe)
 		}
 	}
 
-	ret = xe_bo_apply_to_pinned(xe, &xe->pinned.external_vram,
-				    &xe->pinned.external_vram,
+	ret = xe_bo_apply_to_pinned(xe, &xe->pinned.external,
+				    &xe->pinned.external,
 				    xe_bo_evict_pinned);
 
 	/*
@@ -181,8 +181,8 @@ int xe_bo_restore_user(struct xe_device *xe)
 		return 0;
 
 	/* Pinned user memory in VRAM should be validated on resume */
-	ret = xe_bo_apply_to_pinned(xe, &xe->pinned.external_vram,
-				    &xe->pinned.external_vram,
+	ret = xe_bo_apply_to_pinned(xe, &xe->pinned.external,
+				    &xe->pinned.external,
 				    xe_bo_restore_pinned);
 
 	/* Wait for restore to complete */
@@ -191,3 +191,79 @@ int xe_bo_restore_user(struct xe_device *xe)
 
 	return ret;
 }
+
+static void xe_bo_pci_dev_remove_pinned(struct xe_device *xe)
+{
+	struct xe_tile *tile;
+	unsigned int id;
+
+	(void)xe_bo_apply_to_pinned(xe, &xe->pinned.external,
+				    &xe->pinned.external,
+				    xe_bo_dma_unmap_pinned);
+	for_each_tile(tile, xe, id)
+		xe_tile_migrate_wait(tile);
+}
+
+/**
+ * xe_bo_pci_dev_remove_all() - Handle bos when the pci_device is about to be removed
+ * @xe: The xe device.
+ *
+ * On pci_device removal we need to drop all dma mappings and move
+ * the data of exported bos out to system. This includes SVM bos and
+ * exported dma-buf bos. This is done by evicting all bos, but
+ * the evict placement in xe_evict_flags() is chosen such that all
+ * bos except those mentioned are purged, and thus their memory
+ * is released.
+ *
+ * For pinned bos, we're unmapping dma.
+ */
+void xe_bo_pci_dev_remove_all(struct xe_device *xe)
+{
+	unsigned int mem_type;
+
+	/*
+	 * Move pagemap bos and exported dma-buf to system, and
+	 * purge everything else.
+	 */
+	for (mem_type = XE_PL_VRAM1; mem_type >= XE_PL_TT; --mem_type) {
+		struct ttm_resource_manager *man =
+			ttm_manager_type(&xe->ttm, mem_type);
+
+		if (man) {
+			int ret = ttm_resource_manager_evict_all(&xe->ttm, man);
+
+			drm_WARN_ON(&xe->drm, ret);
+		}
+	}
+
+	xe_bo_pci_dev_remove_pinned(xe);
+}
+
+static void xe_bo_pinned_fini(void *arg)
+{
+	struct xe_device *xe = arg;
+
+	(void)xe_bo_apply_to_pinned(xe, &xe->pinned.kernel_bo_present,
+				    &xe->pinned.kernel_bo_present,
+				    xe_bo_dma_unmap_pinned);
+}
+
+/**
+ * xe_bo_pinned_init() - Initialize pinned bo tracking
+ * @xe: The xe device.
+ *
+ * Initializes the lists and locks required for pinned bo
+ * tracking and registers a callback to dma-unmap
+ * any remaining pinned bos on pci device removal.
+ *
+ * Return: %0 on success, negative error code on error.
+ */
+int xe_bo_pinned_init(struct xe_device *xe)
+{
+	spin_lock_init(&xe->pinned.lock);
+	INIT_LIST_HEAD(&xe->pinned.kernel_bo_present);
+	INIT_LIST_HEAD(&xe->pinned.external);
+	INIT_LIST_HEAD(&xe->pinned.evicted);
+
+	return devm_add_action_or_reset(xe->drm.dev, xe_bo_pinned_fini, xe);
+}
diff --git a/drivers/gpu/drm/xe/xe_bo_evict.h b/drivers/gpu/drm/xe/xe_bo_evict.h
index 746894798852..0708d50ddfa8 100644
--- a/drivers/gpu/drm/xe/xe_bo_evict.h
+++ b/drivers/gpu/drm/xe/xe_bo_evict.h
@@ -12,4 +12,7 @@ int xe_bo_evict_all(struct xe_device *xe);
 int xe_bo_restore_kernel(struct xe_device *xe);
 int xe_bo_restore_user(struct xe_device *xe);
 
+void xe_bo_pci_dev_remove_all(struct xe_device *xe);
+
+int xe_bo_pinned_init(struct xe_device *xe);
 #endif
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 1ffb7d1f6be6..d8e227ddf255 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -23,6 +23,7 @@
 #include "regs/xe_gt_regs.h"
 #include "regs/xe_regs.h"
 #include "xe_bo.h"
+#include "xe_bo_evict.h"
 #include "xe_debugfs.h"
 #include "xe_devcoredump.h"
 #include "xe_dma_buf.h"
@@ -467,10 +468,9 @@ struct xe_device *xe_device_create(struct pci_dev *pdev,
 			xa_erase(&xe->usm.asid_to_vm, asid);
 	}
 
-	spin_lock_init(&xe->pinned.lock);
-	INIT_LIST_HEAD(&xe->pinned.kernel_bo_present);
-	INIT_LIST_HEAD(&xe->pinned.external_vram);
-	INIT_LIST_HEAD(&xe->pinned.evicted);
+	err = xe_bo_pinned_init(xe);
+	if (err)
+		goto err;
 
 	xe->preempt_fence_wq = alloc_ordered_workqueue("xe-preempt-fence-wq",
 						       WQ_MEM_RECLAIM);
@@ -939,6 +939,8 @@ void xe_device_remove(struct xe_device *xe)
 	xe_display_unregister(xe);
 
 	drm_dev_unplug(&xe->drm);
+
+	xe_bo_pci_dev_remove_all(xe);
 }
 
 void xe_device_shutdown(struct xe_device *xe)
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 4472bf849f2b..0cc14daf75d3 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -426,8 +426,8 @@ struct xe_device {
 		struct list_head kernel_bo_present;
 		/** @pinned.evicted: pinned BO that have been evicted */
 		struct list_head evicted;
-		/** @pinned.external_vram: pinned external BO in vram*/
-		struct list_head external_vram;
+		/** @pinned.external: pinned external and dma-buf. */
+		struct list_head external;
 	} pinned;
 
 	/** @ufence_wq: user fence wait queue */
-- 
2.51.0


From 4c2007540f2d89a3e708e884dda0df6975fd31bd Mon Sep 17 00:00:00 2001
From: =?utf8?q?Thomas=20Hellstr=C3=B6m?= <thomas.hellstrom@linux.intel.com>
Date: Wed, 26 Mar 2025 09:05:50 +0100
Subject: [PATCH 13/16] drm/xe/migrate: Allow xe_migrate_vram() also on
 non-pagefault capable devices
MIME-Version: 1.0
Content-Type: text/plain; charset=utf8
Content-Transfer-Encoding: 8bit

The drm_pagemap functionality does not depend on the device having
recoverable pagefaults available. So allow xe_migrate_vram() also for
such devices. Even if this will have little use in practice, it's
beneficial for testin multi-device SVM, since a memory provider could
be a non-pagefault capable gpu.

Signed-off-by: Thomas HellstrÃ¶m <thomas.hellstrom@linux.intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Link: https://lore.kernel.org/r/20250326080551.40201-5-thomas.hellstrom@linux.intel.com
---
 drivers/gpu/drm/xe/xe_migrate.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index d364c9f458e7..f17234533504 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -1609,6 +1609,7 @@ static struct dma_fence *xe_migrate_vram(struct xe_migrate *m,
 {
 	struct xe_gt *gt = m->tile->primary_gt;
 	struct xe_device *xe = gt_to_xe(gt);
+	bool use_usm_batch = xe->info.has_usm;
 	struct dma_fence *fence = NULL;
 	u32 batch_size = 2;
 	u64 src_L0_ofs, dst_L0_ofs;
@@ -1625,7 +1626,7 @@ static struct dma_fence *xe_migrate_vram(struct xe_migrate *m,
 	batch_size += pte_update_cmd_size(round_update_size);
 	batch_size += EMIT_COPY_DW;
 
-	bb = xe_bb_new(gt, batch_size, true);
+	bb = xe_bb_new(gt, batch_size, use_usm_batch);
 	if (IS_ERR(bb)) {
 		err = PTR_ERR(bb);
 		return ERR_PTR(err);
@@ -1650,7 +1651,7 @@ static struct dma_fence *xe_migrate_vram(struct xe_migrate *m,
 		  XE_PAGE_SIZE);
 
 	job = xe_bb_create_migration_job(m->q, bb,
-					 xe_migrate_batch_base(m, true),
+					 xe_migrate_batch_base(m, use_usm_batch),
 					 update_idx);
 	if (IS_ERR(job)) {
 		err = PTR_ERR(job);
-- 
2.51.0


From b2d4b03b03a781f09148fa255baeccc3c3789ed2 Mon Sep 17 00:00:00 2001
From: =?utf8?q?Thomas=20Hellstr=C3=B6m?= <thomas.hellstrom@linux.intel.com>
Date: Wed, 26 Mar 2025 09:05:51 +0100
Subject: [PATCH 14/16] drm/xe: Make the PT code handle placement per PTE
 rather than per vma / range
MIME-Version: 1.0
Content-Type: text/plain; charset=utf8
Content-Transfer-Encoding: 8bit

With SVM, ranges forwarded to the PT code for binding can, mostly
due to races when migrating, point to both VRAM and system / foreign
device memory. Make the PT code able to handle that by checking,
for each PTE set up, whether it points to local VRAM or to system
memory.

v2:
- Fix system memory GPU atomic access.
v3:
- Avoid the UAPI change. It needs more thought.

Signed-off-by: Thomas HellstrÃ¶m <thomas.hellstrom@linux.intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Link: https://lore.kernel.org/r/20250326080551.40201-6-thomas.hellstrom@linux.intel.com
---
 drivers/gpu/drm/xe/xe_bo.c |  12 +++-
 drivers/gpu/drm/xe/xe_pt.c | 120 +++++++++++++++++++------------------
 2 files changed, 70 insertions(+), 62 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 9d043d2c30fd..3c7c2353d3c8 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -2116,10 +2116,16 @@ uint64_t vram_region_gpu_offset(struct ttm_resource *res)
 {
 	struct xe_device *xe = ttm_to_xe_device(res->bo->bdev);
 
-	if (res->mem_type == XE_PL_STOLEN)
+	switch (res->mem_type) {
+	case XE_PL_STOLEN:
 		return xe_ttm_stolen_gpu_offset(xe);
-
-	return res_to_mem_region(res)->dpa_base;
+	case XE_PL_TT:
+	case XE_PL_SYSTEM:
+		return 0;
+	default:
+		return res_to_mem_region(res)->dpa_base;
+	}
+	return 0;
 }
 
 /**
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index 9e719535a3bb..82ae159feed1 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -278,13 +278,15 @@ struct xe_pt_stage_bind_walk {
 	struct xe_vm *vm;
 	/** @tile: The tile we're building for. */
 	struct xe_tile *tile;
-	/** @default_pte: PTE flag only template. No address is associated */
-	u64 default_pte;
+	/** @default_pte: PTE flag only template for VRAM. No address is associated */
+	u64 default_vram_pte;
+	/** @default_pte: PTE flag only template for VRAM. No address is associated */
+	u64 default_system_pte;
 	/** @dma_offset: DMA offset to add to the PTE. */
 	u64 dma_offset;
 	/**
 	 * @needs_64k: This address range enforces 64K alignment and
-	 * granularity.
+	 * granularity on VRAM.
 	 */
 	bool needs_64K;
 	/**
@@ -515,13 +517,16 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset,
 	if (level == 0 || xe_pt_hugepte_possible(addr, next, level, xe_walk)) {
 		struct xe_res_cursor *curs = xe_walk->curs;
 		bool is_null = xe_vma_is_null(xe_walk->vma);
+		bool is_vram = is_null ? false : xe_res_is_vram(curs);
 
 		XE_WARN_ON(xe_walk->va_curs_start != addr);
 
 		pte = vm->pt_ops->pte_encode_vma(is_null ? 0 :
 						 xe_res_dma(curs) + xe_walk->dma_offset,
 						 xe_walk->vma, pat_index, level);
-		pte |= xe_walk->default_pte;
+		if (!is_null)
+			pte |= is_vram ? xe_walk->default_vram_pte :
+				xe_walk->default_system_pte;
 
 		/*
 		 * Set the XE_PTE_PS64 hint if possible, otherwise if
@@ -531,7 +536,7 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset,
 			if (xe_pt_is_pte_ps64K(addr, next, xe_walk)) {
 				xe_walk->vma->gpuva.flags |= XE_VMA_PTE_64K;
 				pte |= XE_PTE_PS64;
-			} else if (XE_WARN_ON(xe_walk->needs_64K)) {
+			} else if (XE_WARN_ON(xe_walk->needs_64K && is_vram)) {
 				return -EINVAL;
 			}
 		}
@@ -603,6 +608,44 @@ static const struct xe_pt_walk_ops xe_pt_stage_bind_ops = {
 	.pt_entry = xe_pt_stage_bind_entry,
 };
 
+/*
+ * Default atomic expectations for different allocation scenarios are as follows:
+ *
+ * 1. Traditional API: When the VM is not in LR mode:
+ *    - Device atomics are expected to function with all allocations.
+ *
+ * 2. Compute/SVM API: When the VM is in LR mode:
+ *    - Device atomics are the default behavior when the bo is placed in a single region.
+ *    - In all other cases device atomics will be disabled with AE=0 until an application
+ *      request differently using a ioctl like madvise.
+ */
+static bool xe_atomic_for_vram(struct xe_vm *vm)
+{
+	return true;
+}
+
+static bool xe_atomic_for_system(struct xe_vm *vm, struct xe_bo *bo)
+{
+	struct xe_device *xe = vm->xe;
+
+	if (!xe->info.has_device_atomics_on_smem)
+		return false;
+
+	/*
+	 * If a SMEM+LMEM allocation is backed by SMEM, a device
+	 * atomics will cause a gpu page fault and which then
+	 * gets migrated to LMEM, bind such allocations with
+	 * device atomics enabled.
+	 *
+	 * TODO: Revisit this. Perhaps add something like a
+	 * fault_on_atomics_in_system UAPI flag.
+	 * Note that this also prohibits GPU atomics in LR mode for
+	 * userptr and system memory on DGFX.
+	 */
+	return (!IS_DGFX(xe) || (!xe_vm_in_lr_mode(vm) ||
+				 (bo && xe_bo_has_single_placement(bo))));
+}
+
 /**
  * xe_pt_stage_bind() - Build a disconnected page-table tree for a given address
  * range.
@@ -629,9 +672,8 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma,
 {
 	struct xe_device *xe = tile_to_xe(tile);
 	struct xe_bo *bo = xe_vma_bo(vma);
-	bool is_devmem = !xe_vma_is_userptr(vma) && bo &&
-		(xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo));
 	struct xe_res_cursor curs;
+	struct xe_vm *vm = xe_vma_vm(vma);
 	struct xe_pt_stage_bind_walk xe_walk = {
 		.base = {
 			.ops = &xe_pt_stage_bind_ops,
@@ -639,7 +681,7 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma,
 			.max_level = XE_PT_HIGHEST_LEVEL,
 			.staging = true,
 		},
-		.vm = xe_vma_vm(vma),
+		.vm = vm,
 		.tile = tile,
 		.curs = &curs,
 		.va_curs_start = range ? range->base.itree.start :
@@ -647,26 +689,22 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma,
 		.vma = vma,
 		.wupd.entries = entries,
 	};
-	struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id];
+	struct xe_pt *pt = vm->pt_root[tile->id];
 	int ret;
 
 	if (range) {
 		/* Move this entire thing to xe_svm.c? */
-		xe_svm_notifier_lock(xe_vma_vm(vma));
+		xe_svm_notifier_lock(vm);
 		if (!xe_svm_range_pages_valid(range)) {
 			xe_svm_range_debug(range, "BIND PREPARE - RETRY");
-			xe_svm_notifier_unlock(xe_vma_vm(vma));
+			xe_svm_notifier_unlock(vm);
 			return -EAGAIN;
 		}
 		if (xe_svm_range_has_dma_mapping(range)) {
 			xe_res_first_dma(range->base.dma_addr, 0,
 					 range->base.itree.last + 1 - range->base.itree.start,
 					 &curs);
-			is_devmem = xe_res_is_vram(&curs);
-			if (is_devmem)
-				xe_svm_range_debug(range, "BIND PREPARE - DMA VRAM");
-			else
-				xe_svm_range_debug(range, "BIND PREPARE - DMA");
+			xe_svm_range_debug(range, "BIND PREPARE - MIXED");
 		} else {
 			xe_assert(xe, false);
 		}
@@ -674,54 +712,18 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma,
 		 * Note, when unlocking the resource cursor dma addresses may become
 		 * stale, but the bind will be aborted anyway at commit time.
 		 */
-		xe_svm_notifier_unlock(xe_vma_vm(vma));
+		xe_svm_notifier_unlock(vm);
 	}
 
-	xe_walk.needs_64K = (xe_vma_vm(vma)->flags & XE_VM_FLAG_64K) && is_devmem;
-
-	/**
-	 * Default atomic expectations for different allocation scenarios are as follows:
-	 *
-	 * 1. Traditional API: When the VM is not in LR mode:
-	 *    - Device atomics are expected to function with all allocations.
-	 *
-	 * 2. Compute/SVM API: When the VM is in LR mode:
-	 *    - Device atomics are the default behavior when the bo is placed in a single region.
-	 *    - In all other cases device atomics will be disabled with AE=0 until an application
-	 *      request differently using a ioctl like madvise.
-	 */
+	xe_walk.needs_64K = (vm->flags & XE_VM_FLAG_64K);
 	if (vma->gpuva.flags & XE_VMA_ATOMIC_PTE_BIT) {
-		if (xe_vm_in_lr_mode(xe_vma_vm(vma))) {
-			if (bo && xe_bo_has_single_placement(bo))
-				xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE;
-			/**
-			 * If a SMEM+LMEM allocation is backed by SMEM, a device
-			 * atomics will cause a gpu page fault and which then
-			 * gets migrated to LMEM, bind such allocations with
-			 * device atomics enabled.
-			 */
-			else if (is_devmem)
-				xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE;
-		} else {
-			xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE;
-		}
-
-		/**
-		 * Unset AE if the platform(PVC) doesn't support it on an
-		 * allocation
-		 */
-		if (!xe->info.has_device_atomics_on_smem && !is_devmem)
-			xe_walk.default_pte &= ~XE_USM_PPGTT_PTE_AE;
-	}
-
-	if (is_devmem) {
-		xe_walk.default_pte |= XE_PPGTT_PTE_DM;
-		xe_walk.dma_offset = bo ? vram_region_gpu_offset(bo->ttm.resource) : 0;
+		xe_walk.default_vram_pte = xe_atomic_for_vram(vm) ? XE_USM_PPGTT_PTE_AE : 0;
+		xe_walk.default_system_pte = xe_atomic_for_system(vm, bo) ?
+			XE_USM_PPGTT_PTE_AE : 0;
 	}
 
-	if (!xe_vma_has_no_bo(vma) && xe_bo_is_stolen(bo))
-		xe_walk.dma_offset = xe_ttm_stolen_gpu_offset(xe_bo_device(bo));
-
+	xe_walk.default_vram_pte |= XE_PPGTT_PTE_DM;
+	xe_walk.dma_offset = bo ? vram_region_gpu_offset(bo->ttm.resource) : 0;
 	if (!range)
 		xe_bo_assert_held(bo);
 
-- 
2.51.0


From c045e03634ab677ef9351e9b350da78b5c9d7961 Mon Sep 17 00:00:00 2001
From: Aradhya Bhatia <aradhya.bhatia@intel.com>
Date: Wed, 26 Mar 2025 20:49:29 +0530
Subject: [PATCH 15/16] drm/xe/migrate: Switch from drm to dev managed actions

Change the scope of the migrate subsystem to be dev managed instead of
drm managed.

The parent pci struct &device, that the xe struct &drm_device is a part
of, gets removed when a hot unplug is triggered, which causes the
underlying iommu group to get destroyed as well.

The migrate subsystem, which handles the lifetime of the page-table tree
(pt) BO, doesn't get a chance to keep the BO back during the hot unplug,
as all the references to DRM haven't been put back.
When all the references to DRM are indeed put back later, the migrate
subsystem tries to put back the pt BO. Since the underlying iommu group
has been already destroyed, a kernel NULL ptr dereference takes place
while attempting to keep back the pt BO.

Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/3914
Suggested-by: Thomas Hellstrom <thomas.hellstrom@intel.com>
Reviewed-by: Tejas Upadhyay <tejas.upadhyay@intel.com>
Signed-off-by: Aradhya Bhatia <aradhya.bhatia@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20250326151929.1495972-1-aradhya.bhatia@intel.com
Signed-off-by: Tejas Upadhyay <tejas.upadhyay@intel.com>
---
 drivers/gpu/drm/xe/xe_migrate.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index f17234533504..f64530a64b01 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -97,7 +97,7 @@ struct xe_exec_queue *xe_tile_migrate_exec_queue(struct xe_tile *tile)
 	return tile->migrate->q;
 }
 
-static void xe_migrate_fini(struct drm_device *dev, void *arg)
+static void xe_migrate_fini(void *arg)
 {
 	struct xe_migrate *m = arg;
 
@@ -401,7 +401,7 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile)
 	struct xe_vm *vm;
 	int err;
 
-	m = drmm_kzalloc(&xe->drm, sizeof(*m), GFP_KERNEL);
+	m = devm_kzalloc(xe->drm.dev, sizeof(*m), GFP_KERNEL);
 	if (!m)
 		return ERR_PTR(-ENOMEM);
 
@@ -455,7 +455,7 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile)
 	might_lock(&m->job_mutex);
 	fs_reclaim_release(GFP_KERNEL);
 
-	err = drmm_add_action_or_reset(&xe->drm, xe_migrate_fini, m);
+	err = devm_add_action_or_reset(xe->drm.dev, xe_migrate_fini, m);
 	if (err)
 		return ERR_PTR(err);
 
-- 
2.51.0


From b88f48f86500bc0b44b4f73ac66d500a40d320ad Mon Sep 17 00:00:00 2001
From: =?utf8?q?Thomas=20Hellstr=C3=B6m?= <thomas.hellstrom@linux.intel.com>
Date: Wed, 26 Mar 2025 16:16:34 +0100
Subject: [PATCH 16/16] drm/xe: Fix an out-of-bounds shift when invalidating
 TLB
MIME-Version: 1.0
Content-Type: text/plain; charset=utf8
Content-Transfer-Encoding: 8bit

When the size of the range invalidated is larger than
rounddown_pow_of_two(ULONG_MAX),
The function macro roundup_pow_of_two(length) will hit an out-of-bounds
shift [1].

Use a full TLB invalidation for such cases.
v2:
- Use a define for the range size limit over which we use a full
  TLB invalidation. (Lucas)
- Use a better calculation of the limit.

[1]:
[   39.202421] ------------[ cut here ]------------
[   39.202657] UBSAN: shift-out-of-bounds in ./include/linux/log2.h:57:13
[   39.202673] shift exponent 64 is too large for 64-bit type 'long unsigned int'
[   39.202688] CPU: 8 UID: 0 PID: 3129 Comm: xe_exec_system_ Tainted: G     U             6.14.0+ #10
[   39.202690] Tainted: [U]=USER
[   39.202690] Hardware name: ASUS System Product Name/PRIME B560M-A AC, BIOS 2001 02/01/2023
[   39.202691] Call Trace:
[   39.202692]  <TASK>
[   39.202695]  dump_stack_lvl+0x6e/0xa0
[   39.202699]  ubsan_epilogue+0x5/0x30
[   39.202701]  __ubsan_handle_shift_out_of_bounds.cold+0x61/0xe6
[   39.202705]  xe_gt_tlb_invalidation_range.cold+0x1d/0x3a [xe]
[   39.202800]  ? find_held_lock+0x2b/0x80
[   39.202803]  ? mark_held_locks+0x40/0x70
[   39.202806]  xe_svm_invalidate+0x459/0x700 [xe]
[   39.202897]  drm_gpusvm_notifier_invalidate+0x4d/0x70 [drm_gpusvm]
[   39.202900]  __mmu_notifier_release+0x1f5/0x270
[   39.202905]  exit_mmap+0x40e/0x450
[   39.202912]  __mmput+0x45/0x110
[   39.202914]  exit_mm+0xc5/0x130
[   39.202916]  do_exit+0x21c/0x500
[   39.202918]  ? lockdep_hardirqs_on_prepare+0xdb/0x190
[   39.202920]  do_group_exit+0x36/0xa0
[   39.202922]  get_signal+0x8f8/0x900
[   39.202926]  arch_do_signal_or_restart+0x35/0x100
[   39.202930]  syscall_exit_to_user_mode+0x1fc/0x290
[   39.202932]  do_syscall_64+0xa1/0x180
[   39.202934]  ? do_user_addr_fault+0x59f/0x8a0
[   39.202937]  ? lock_release+0xd2/0x2a0
[   39.202939]  ? do_user_addr_fault+0x5a9/0x8a0
[   39.202942]  ? trace_hardirqs_off+0x4b/0xc0
[   39.202944]  ? clear_bhb_loop+0x25/0x80
[   39.202946]  ? clear_bhb_loop+0x25/0x80
[   39.202947]  ? clear_bhb_loop+0x25/0x80
[   39.202950]  entry_SYSCALL_64_after_hwframe+0x76/0x7e
[   39.202952] RIP: 0033:0x7fa945e543e1
[   39.202961] Code: Unable to access opcode bytes at 0x7fa945e543b7.
[   39.202962] RSP: 002b:00007ffca8fb4170 EFLAGS: 00000293
[   39.202963] RAX: 000000000000003d RBX: 0000000000000000 RCX: 00007fa945e543e3
[   39.202964] RDX: 0000000000000000 RSI: 00007ffca8fb41ac RDI: 00000000ffffffff
[   39.202964] RBP: 00007ffca8fb4190 R08: 0000000000000000 R09: 00007fa945f600a0
[   39.202965] R10: 0000000000000000 R11: 0000000000000293 R12: 0000000000000000
[   39.202966] R13: 00007fa9460dd310 R14: 00007ffca8fb41ac R15: 0000000000000000
[   39.202970]  </TASK>
[   39.202970] ---[ end trace ]---

Fixes: 332dd0116c82 ("drm/xe: Add range based TLB invalidations")
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: <stable@vger.kernel.org> # v6.8+
Signed-off-by: Thomas HellstrÃ¶m <thomas.hellstrom@linux.intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com> #v1
Link: https://lore.kernel.org/r/20250326151634.36916-1-thomas.hellstrom@linux.intel.com
---
 drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
index 03072e094991..084cbdeba8ea 100644
--- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
@@ -322,6 +322,13 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt)
 	return 0;
 }
 
+/*
+ * Ensure that roundup_pow_of_two(length) doesn't overflow.
+ * Note that roundup_pow_of_two() operates on unsigned long,
+ * not on u64.
+ */
+#define MAX_RANGE_TLB_INVALIDATION_LENGTH (rounddown_pow_of_two(ULONG_MAX))
+
 /**
  * xe_gt_tlb_invalidation_range - Issue a TLB invalidation on this GT for an
  * address range
@@ -346,6 +353,7 @@ int xe_gt_tlb_invalidation_range(struct xe_gt *gt,
 	struct xe_device *xe = gt_to_xe(gt);
 #define MAX_TLB_INVALIDATION_LEN	7
 	u32 action[MAX_TLB_INVALIDATION_LEN];
+	u64 length = end - start;
 	int len = 0;
 
 	xe_gt_assert(gt, fence);
@@ -358,11 +366,11 @@ int xe_gt_tlb_invalidation_range(struct xe_gt *gt,
 
 	action[len++] = XE_GUC_ACTION_TLB_INVALIDATION;
 	action[len++] = 0; /* seqno, replaced in send_tlb_invalidation */
-	if (!xe->info.has_range_tlb_invalidation) {
+	if (!xe->info.has_range_tlb_invalidation ||
+	    length > MAX_RANGE_TLB_INVALIDATION_LENGTH) {
 		action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL);
 	} else {
 		u64 orig_start = start;
-		u64 length = end - start;
 		u64 align;
 
 		if (length < SZ_4K)
-- 
2.51.0