From 70b8e6e3226fe2ae514a324ff0c64f074f6f419b Mon Sep 17 00:00:00 2001
From: Marcin Bernatowicz <marcin.bernatowicz@linux.intel.com>
Date: Wed, 11 Dec 2024 07:42:11 +0100
Subject: [PATCH 01/16] drm/xe/vf: Don't apply Wa_22019338487 for VF

Don't use Wa_22019338487 for VF.

Signed-off-by: Marcin Bernatowicz <marcin.bernatowicz@linux.intel.com>
Cc: Adam Miszczak <adam.miszczak@linux.intel.com>
Cc: Jakub Kolakowski <jakub1.kolakowski@intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Cc: Lukasz Laguna <lukasz.laguna@intel.com>
Cc: Matt Roper <matthew.d.roper@intel.com>
Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Michal Winiarski <michal.winiarski@intel.com>
Cc: Narasimha C V <narasimha.c.v@intel.com>
Cc: Piotr Piorkowski <piotr.piorkowski@intel.com>
Cc: Satyanarayana K V P <satyanarayana.k.v.p@intel.com>
Cc: Tomasz Lis <tomasz.lis@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20241211064211.781820-4-marcin.bernatowicz@linux.intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
---
 drivers/gpu/drm/xe/xe_wa_oob.rules | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules
index 3ed12a85cc60..40438c3d9b72 100644
--- a/drivers/gpu/drm/xe/xe_wa_oob.rules
+++ b/drivers/gpu/drm/xe/xe_wa_oob.rules
@@ -34,7 +34,7 @@
 		GRAPHICS_VERSION(2004)
 22019338487	MEDIA_VERSION(2000)
 		GRAPHICS_VERSION(2001)
-		MEDIA_VERSION(3000), MEDIA_STEP(A0, B0)
+		MEDIA_VERSION(3000), MEDIA_STEP(A0, B0), FUNC(xe_rtp_match_not_sriov_vf)
 22019338487_display	PLATFORM(LUNARLAKE)
 16023588340	GRAPHICS_VERSION(2001)
 14019789679	GRAPHICS_VERSION(1255)
-- 
2.51.0


From c6aac2fa77a3221f2ed0484bf019030f0749d863 Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Fri, 20 Dec 2024 10:29:36 -0500
Subject: [PATCH 02/16] drm/xe: Introduce the RPa information

RPa is the Achievable frequency, defined by PCODE at runtime
based on multiple running conditions.

v2: Remove RPA_MASK from i915 file

Cc: Vinay Belgaumkar <vinay.belgaumkar@intel.com>
Reviewed-by: Jonathan Cavitt <jonathan.cavitt@intel.com>
Reviewed-by: Vinay Belgaumkar <vinay.belgaumkar@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20241220152936.623627-1-rodrigo.vivi@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_regs.h    |  4 +++
 drivers/gpu/drm/xe/xe_gt_freq.c      | 15 ++++++++
 drivers/gpu/drm/xe/xe_guc_pc.c       | 53 ++++++++++++++++++++++++++--
 drivers/gpu/drm/xe/xe_guc_pc.h       |  1 +
 drivers/gpu/drm/xe/xe_guc_pc_types.h |  2 ++
 5 files changed, 73 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/regs/xe_regs.h b/drivers/gpu/drm/xe/regs/xe_regs.h
index 3293172b0128..6cf282618836 100644
--- a/drivers/gpu/drm/xe/regs/xe_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_regs.h
@@ -44,12 +44,16 @@
 
 #define MTL_RP_STATE_CAP			XE_REG(0x138000)
 
+#define MTL_GT_RPA_FREQUENCY			XE_REG(0x138008)
 #define MTL_GT_RPE_FREQUENCY			XE_REG(0x13800c)
 
 #define MTL_MEDIAP_STATE_CAP			XE_REG(0x138020)
 #define   MTL_RPN_CAP_MASK			REG_GENMASK(24, 16)
 #define   MTL_RP0_CAP_MASK			REG_GENMASK(8, 0)
 
+#define MTL_MPA_FREQUENCY			XE_REG(0x138028)
+#define   MTL_RPA_MASK				REG_GENMASK(8, 0)
+
 #define MTL_MPE_FREQUENCY			XE_REG(0x13802c)
 #define   MTL_RPE_MASK				REG_GENMASK(8, 0)
 
diff --git a/drivers/gpu/drm/xe/xe_gt_freq.c b/drivers/gpu/drm/xe/xe_gt_freq.c
index 6bd39b2c5003..604bdc7c8173 100644
--- a/drivers/gpu/drm/xe/xe_gt_freq.c
+++ b/drivers/gpu/drm/xe/xe_gt_freq.c
@@ -115,6 +115,20 @@ static ssize_t rpe_freq_show(struct device *dev,
 }
 static DEVICE_ATTR_RO(rpe_freq);
 
+static ssize_t rpa_freq_show(struct device *dev,
+			     struct device_attribute *attr, char *buf)
+{
+	struct xe_guc_pc *pc = dev_to_pc(dev);
+	u32 freq;
+
+	xe_pm_runtime_get(dev_to_xe(dev));
+	freq = xe_guc_pc_get_rpa_freq(pc);
+	xe_pm_runtime_put(dev_to_xe(dev));
+
+	return sysfs_emit(buf, "%d\n", freq);
+}
+static DEVICE_ATTR_RO(rpa_freq);
+
 static ssize_t rpn_freq_show(struct device *dev,
 			     struct device_attribute *attr, char *buf)
 {
@@ -202,6 +216,7 @@ static const struct attribute *freq_attrs[] = {
 	&dev_attr_act_freq.attr,
 	&dev_attr_cur_freq.attr,
 	&dev_attr_rp0_freq.attr,
+	&dev_attr_rpa_freq.attr,
 	&dev_attr_rpe_freq.attr,
 	&dev_attr_rpn_freq.attr,
 	&dev_attr_min_freq.attr,
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
index e8b9faeaef64..a6fdd2c062ff 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.c
+++ b/drivers/gpu/drm/xe/xe_guc_pc.c
@@ -38,6 +38,7 @@
 
 #define FREQ_INFO_REC	XE_REG(MCHBAR_MIRROR_BASE_SNB + 0x5ef0)
 #define   RPE_MASK		REG_GENMASK(15, 8)
+#define   RPA_MASK		REG_GENMASK(31, 16)
 
 #define GT_PERF_STATUS		XE_REG(0x1381b4)
 #define   CAGF_MASK	REG_GENMASK(19, 11)
@@ -328,6 +329,19 @@ static int pc_set_max_freq(struct xe_guc_pc *pc, u32 freq)
 				   freq);
 }
 
+static void mtl_update_rpa_value(struct xe_guc_pc *pc)
+{
+	struct xe_gt *gt = pc_to_gt(pc);
+	u32 reg;
+
+	if (xe_gt_is_media_type(gt))
+		reg = xe_mmio_read32(&gt->mmio, MTL_MPA_FREQUENCY);
+	else
+		reg = xe_mmio_read32(&gt->mmio, MTL_GT_RPA_FREQUENCY);
+
+	pc->rpa_freq = decode_freq(REG_FIELD_GET(MTL_RPA_MASK, reg));
+}
+
 static void mtl_update_rpe_value(struct xe_guc_pc *pc)
 {
 	struct xe_gt *gt = pc_to_gt(pc);
@@ -341,6 +355,25 @@ static void mtl_update_rpe_value(struct xe_guc_pc *pc)
 	pc->rpe_freq = decode_freq(REG_FIELD_GET(MTL_RPE_MASK, reg));
 }
 
+static void tgl_update_rpa_value(struct xe_guc_pc *pc)
+{
+	struct xe_gt *gt = pc_to_gt(pc);
+	struct xe_device *xe = gt_to_xe(gt);
+	u32 reg;
+
+	/*
+	 * For PVC we still need to use fused RP1 as the approximation for RPe
+	 * For other platforms than PVC we get the resolved RPe directly from
+	 * PCODE at a different register
+	 */
+	if (xe->info.platform == XE_PVC)
+		reg = xe_mmio_read32(&gt->mmio, PVC_RP_STATE_CAP);
+	else
+		reg = xe_mmio_read32(&gt->mmio, FREQ_INFO_REC);
+
+	pc->rpa_freq = REG_FIELD_GET(RPA_MASK, reg) * GT_FREQUENCY_MULTIPLIER;
+}
+
 static void tgl_update_rpe_value(struct xe_guc_pc *pc)
 {
 	struct xe_gt *gt = pc_to_gt(pc);
@@ -365,10 +398,13 @@ static void pc_update_rp_values(struct xe_guc_pc *pc)
 	struct xe_gt *gt = pc_to_gt(pc);
 	struct xe_device *xe = gt_to_xe(gt);
 
-	if (GRAPHICS_VERx100(xe) >= 1270)
+	if (GRAPHICS_VERx100(xe) >= 1270) {
+		mtl_update_rpa_value(pc);
 		mtl_update_rpe_value(pc);
-	else
+	} else {
+		tgl_update_rpa_value(pc);
 		tgl_update_rpe_value(pc);
+	}
 
 	/*
 	 * RPe is decided at runtime by PCODE. In the rare case where that's
@@ -447,6 +483,19 @@ u32 xe_guc_pc_get_rp0_freq(struct xe_guc_pc *pc)
 	return pc->rp0_freq;
 }
 
+/**
+ * xe_guc_pc_get_rpa_freq - Get the RPa freq
+ * @pc: The GuC PC
+ *
+ * Returns: RPa freq.
+ */
+u32 xe_guc_pc_get_rpa_freq(struct xe_guc_pc *pc)
+{
+	pc_update_rp_values(pc);
+
+	return pc->rpa_freq;
+}
+
 /**
  * xe_guc_pc_get_rpe_freq - Get the RPe freq
  * @pc: The GuC PC
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.h b/drivers/gpu/drm/xe/xe_guc_pc.h
index efda432fadfc..619f59cd633c 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.h
+++ b/drivers/gpu/drm/xe/xe_guc_pc.h
@@ -21,6 +21,7 @@ int xe_guc_pc_unset_gucrc_mode(struct xe_guc_pc *pc);
 u32 xe_guc_pc_get_act_freq(struct xe_guc_pc *pc);
 int xe_guc_pc_get_cur_freq(struct xe_guc_pc *pc, u32 *freq);
 u32 xe_guc_pc_get_rp0_freq(struct xe_guc_pc *pc);
+u32 xe_guc_pc_get_rpa_freq(struct xe_guc_pc *pc);
 u32 xe_guc_pc_get_rpe_freq(struct xe_guc_pc *pc);
 u32 xe_guc_pc_get_rpn_freq(struct xe_guc_pc *pc);
 int xe_guc_pc_get_min_freq(struct xe_guc_pc *pc, u32 *freq);
diff --git a/drivers/gpu/drm/xe/xe_guc_pc_types.h b/drivers/gpu/drm/xe/xe_guc_pc_types.h
index 13810be015db..2978ac9a249b 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc_types.h
+++ b/drivers/gpu/drm/xe/xe_guc_pc_types.h
@@ -17,6 +17,8 @@ struct xe_guc_pc {
 	struct xe_bo *bo;
 	/** @rp0_freq: HW RP0 frequency - The Maximum one */
 	u32 rp0_freq;
+	/** @rpa_freq: HW RPa frequency - The Achievable one */
+	u32 rpa_freq;
 	/** @rpe_freq: HW RPe frequency - The Efficient one */
 	u32 rpe_freq;
 	/** @rpn_freq: HW RPN frequency - The Minimum one */
-- 
2.51.0


From 3259ff4eff330f8451e8f569951752f5aea38405 Mon Sep 17 00:00:00 2001
From: Vinay Belgaumkar <vinay.belgaumkar@intel.com>
Date: Mon, 16 Dec 2024 11:41:19 -0800
Subject: [PATCH 03/16] drm/xe/slpc: Remove unnecessary force wakes

FORCEWAKE_ALL is not needed when we are trying to read the cur_freq,
we just need to wake up the GT domain. We also do not need a force wake
to for a H2G query to obtain min frequency.

Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Vinay Belgaumkar <vinay.belgaumkar@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20241216194119.3017792-1-vinay.belgaumkar@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_guc_pc.c | 26 +++++++-------------------
 1 file changed, 7 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
index a6fdd2c062ff..df7f130fb663 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.c
+++ b/drivers/gpu/drm/xe/xe_guc_pc.c
@@ -457,8 +457,8 @@ int xe_guc_pc_get_cur_freq(struct xe_guc_pc *pc, u32 *freq)
 	 * GuC SLPC plays with cur freq request when GuCRC is enabled
 	 * Block RC6 for a more reliable read.
 	 */
-	fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
-	if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) {
+	fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+	if (!xe_force_wake_ref_has_domain(fw_ref, XE_FW_GT)) {
 		xe_force_wake_put(gt_to_fw(gt), fw_ref);
 		return -ETIMEDOUT;
 	}
@@ -530,10 +530,10 @@ u32 xe_guc_pc_get_rpn_freq(struct xe_guc_pc *pc)
  */
 int xe_guc_pc_get_min_freq(struct xe_guc_pc *pc, u32 *freq)
 {
-	struct xe_gt *gt = pc_to_gt(pc);
-	unsigned int fw_ref;
 	int ret;
 
+	xe_device_assert_mem_access(pc_to_xe(pc));
+
 	mutex_lock(&pc->freq_lock);
 	if (!pc->freq_ready) {
 		/* Might be in the middle of a gt reset */
@@ -541,24 +541,12 @@ int xe_guc_pc_get_min_freq(struct xe_guc_pc *pc, u32 *freq)
 		goto out;
 	}
 
-	/*
-	 * GuC SLPC plays with min freq request when GuCRC is enabled
-	 * Block RC6 for a more reliable read.
-	 */
-	fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
-	if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) {
-		ret = -ETIMEDOUT;
-		goto fw;
-	}
-
 	ret = pc_action_query_task_state(pc);
 	if (ret)
-		goto fw;
+		goto out;
 
 	*freq = pc_get_min_freq(pc);
 
-fw:
-	xe_force_wake_put(gt_to_fw(gt), fw_ref);
 out:
 	mutex_unlock(&pc->freq_lock);
 	return ret;
@@ -1018,8 +1006,8 @@ int xe_guc_pc_start(struct xe_guc_pc *pc)
 
 	xe_gt_assert(gt, xe_device_uc_enabled(xe));
 
-	fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
-	if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) {
+	fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+	if (!xe_force_wake_ref_has_domain(fw_ref, XE_FW_GT)) {
 		xe_force_wake_put(gt_to_fw(gt), fw_ref);
 		return -ETIMEDOUT;
 	}
-- 
2.51.0


From 75fd04f276de31cc59419fda169232d097fbf291 Mon Sep 17 00:00:00 2001
From: Nitin Gote <nitin.r.gote@intel.com>
Date: Mon, 6 Jan 2025 15:56:46 +0530
Subject: [PATCH 04/16] drm/xe: Fix all typos in xe

Fix all typos in files of xe, reported by codespell tool.

Signed-off-by: Nitin Gote <nitin.r.gote@intel.com>
Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
Reviewed-by: Stuart Summers <stuart.summers@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20250106102646.1400146-2-nitin.r.gote@intel.com
Signed-off-by: Nirmoy Das <nirmoy.das@intel.com>
---
 drivers/gpu/drm/xe/Kconfig.debug           | 4 ++--
 drivers/gpu/drm/xe/abi/guc_capture_abi.h   | 2 +-
 drivers/gpu/drm/xe/abi/guc_klvs_abi.h      | 6 +++---
 drivers/gpu/drm/xe/regs/xe_reg_defs.h      | 2 +-
 drivers/gpu/drm/xe/tests/xe_mocs.c         | 2 +-
 drivers/gpu/drm/xe/xe_bb.c                 | 2 +-
 drivers/gpu/drm/xe/xe_bo.c                 | 8 ++++----
 drivers/gpu/drm/xe/xe_bo_doc.h             | 2 +-
 drivers/gpu/drm/xe/xe_devcoredump.c        | 2 +-
 drivers/gpu/drm/xe/xe_device.c             | 2 +-
 drivers/gpu/drm/xe/xe_drm_client.c         | 2 +-
 drivers/gpu/drm/xe/xe_exec.c               | 2 +-
 drivers/gpu/drm/xe/xe_ggtt.c               | 2 +-
 drivers/gpu/drm/xe/xe_gt.h                 | 2 +-
 drivers/gpu/drm/xe/xe_gt_mcr.c             | 2 +-
 drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c | 2 +-
 drivers/gpu/drm/xe/xe_guc_capture.c        | 2 +-
 drivers/gpu/drm/xe/xe_guc_capture_types.h  | 4 ++--
 drivers/gpu/drm/xe/xe_guc_ct.c             | 4 ++--
 drivers/gpu/drm/xe/xe_guc_submit.c         | 2 +-
 drivers/gpu/drm/xe/xe_hmm.c                | 2 +-
 drivers/gpu/drm/xe/xe_migrate.c            | 2 +-
 drivers/gpu/drm/xe/xe_pci.c                | 4 ++--
 drivers/gpu/drm/xe/xe_pcode.c              | 2 +-
 drivers/gpu/drm/xe/xe_pm.c                 | 2 +-
 drivers/gpu/drm/xe/xe_pt.c                 | 2 +-
 drivers/gpu/drm/xe/xe_rtp.h                | 4 ++--
 drivers/gpu/drm/xe/xe_uc_fw_types.h        | 2 +-
 drivers/gpu/drm/xe/xe_vm.c                 | 4 ++--
 29 files changed, 40 insertions(+), 40 deletions(-)

diff --git a/drivers/gpu/drm/xe/Kconfig.debug b/drivers/gpu/drm/xe/Kconfig.debug
index 2de0de41b8dd..0d749ed44878 100644
--- a/drivers/gpu/drm/xe/Kconfig.debug
+++ b/drivers/gpu/drm/xe/Kconfig.debug
@@ -66,7 +66,7 @@ config DRM_XE_DEBUG_MEM
 	bool "Enable passing SYS/VRAM addresses to user space"
 	default n
 	help
-	  Pass object location trough uapi. Intended for extended
+	  Pass object location through uapi. Intended for extended
 	  testing and development only.
 
 	  Recommended for driver developers only.
@@ -104,5 +104,5 @@ config DRM_XE_USERPTR_INVAL_INJECT
          Choose this option when debugging error paths that
 	 are hit during checks for userptr invalidations.
 
-	 Recomended for driver developers only.
+	 Recommended for driver developers only.
 	 If in doubt, say "N".
diff --git a/drivers/gpu/drm/xe/abi/guc_capture_abi.h b/drivers/gpu/drm/xe/abi/guc_capture_abi.h
index e7898edc6236..dd4117553739 100644
--- a/drivers/gpu/drm/xe/abi/guc_capture_abi.h
+++ b/drivers/gpu/drm/xe/abi/guc_capture_abi.h
@@ -25,7 +25,7 @@ enum guc_state_capture_type {
 
 #define GUC_STATE_CAPTURE_TYPE_MAX	(GUC_STATE_CAPTURE_TYPE_ENGINE_INSTANCE + 1)
 
-/* Class indecies for capture_class and capture_instance arrays */
+/* Class indices for capture_class and capture_instance arrays */
 enum guc_capture_list_class_type {
 	GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE = 0,
 	GUC_CAPTURE_LIST_CLASS_VIDEO = 1,
diff --git a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h
index 7dcb118e3d9f..d633f1c739e4 100644
--- a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h
+++ b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h
@@ -132,7 +132,7 @@ enum  {
  * _`GUC_KLV_VGT_POLICY_SCHED_IF_IDLE` : 0x8001
  *      This config sets whether strict scheduling is enabled whereby any VF
  *      that doesnât have work to submit is still allocated a fixed execution
- *      time-slice to ensure active VFs execution is always consitent even
+ *      time-slice to ensure active VFs execution is always consistent even
  *      during other VF reprovisiong / rebooting events. Changing this KLV
  *      impacts all VFs and takes effect on the next VF-Switch event.
  *
@@ -207,7 +207,7 @@ enum  {
  *      of and this will never be perfectly-exact (accumulated nano-second
  *      granularity) since the GPUs clock time runs off a different crystal
  *      from the CPUs clock. Changing this KLV on a VF that is currently
- *      running a context wont take effect until a new context is scheduled in.
+ *      running a context won't take effect until a new context is scheduled in.
  *      That said, when the PF is changing this value from 0x0 to
  *      a non-zero value, it might never take effect if the VF is running an
  *      infinitely long compute or shader kernel. In such a scenario, the
@@ -227,7 +227,7 @@ enum  {
  *      HW is capable and this will never be perfectly-exact (accumulated
  *      nano-second granularity) since the GPUs clock time runs off a
  *      different crystal from the CPUs clock. Changing this KLV on a VF
- *      that is currently running a context wont take effect until a new
+ *      that is currently running a context won't take effect until a new
  *      context is scheduled in.
  *      That said, when the PF is changing this value from 0x0 to
  *      a non-zero value, it might never take effect if the VF is running an
diff --git a/drivers/gpu/drm/xe/regs/xe_reg_defs.h b/drivers/gpu/drm/xe/regs/xe_reg_defs.h
index 51fd40ffafcb..0eedd6c26b1b 100644
--- a/drivers/gpu/drm/xe/regs/xe_reg_defs.h
+++ b/drivers/gpu/drm/xe/regs/xe_reg_defs.h
@@ -13,7 +13,7 @@
 /**
  * struct xe_reg - Register definition
  *
- * Register defintion to be used by the individual register. Although the same
+ * Register definition to be used by the individual register. Although the same
  * definition is used for xe_reg and xe_reg_mcr, they use different internal
  * APIs for accesses.
  */
diff --git a/drivers/gpu/drm/xe/tests/xe_mocs.c b/drivers/gpu/drm/xe/tests/xe_mocs.c
index 6f9b7a266b41..d3f71d13eb81 100644
--- a/drivers/gpu/drm/xe/tests/xe_mocs.c
+++ b/drivers/gpu/drm/xe/tests/xe_mocs.c
@@ -58,7 +58,7 @@ static void read_l3cc_table(struct xe_gt *gt,
 
 			mocs_dbg(gt, "reg_val=0x%x\n", reg_val);
 		} else {
-			/* Just re-use value read on previous iteration */
+			/* Just reuse value read on previous iteration */
 			reg_val >>= 16;
 		}
 
diff --git a/drivers/gpu/drm/xe/xe_bb.c b/drivers/gpu/drm/xe/xe_bb.c
index ef777dbdf4ec..9570672fce33 100644
--- a/drivers/gpu/drm/xe/xe_bb.c
+++ b/drivers/gpu/drm/xe/xe_bb.c
@@ -41,7 +41,7 @@ struct xe_bb *xe_bb_new(struct xe_gt *gt, u32 dwords, bool usm)
 	/*
 	 * We need to allocate space for the requested number of dwords,
 	 * one additional MI_BATCH_BUFFER_END dword, and additional buffer
-	 * space to accomodate the platform-specific hardware prefetch
+	 * space to accommodate the platform-specific hardware prefetch
 	 * requirements.
 	 */
 	bb->bo = xe_sa_bo_new(!usm ? tile->mem.kernel_bb_pool : gt->usm.bb_pool,
diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index e6c896ad5602..3f5391d416d4 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -786,7 +786,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
 		 * / resume, some of the pinned memory is required for the
 		 * device to resume / use the GPU to move other evicted memory
 		 * (user memory) around. This likely could be optimized a bit
-		 * futher where we find the minimum set of pinned memory
+		 * further where we find the minimum set of pinned memory
 		 * required for resume but for simplity doing a memcpy for all
 		 * pinned memory.
 		 */
@@ -875,7 +875,7 @@ out:
  * xe_bo_evict_pinned() - Evict a pinned VRAM object to system memory
  * @bo: The buffer object to move.
  *
- * On successful completion, the object memory will be moved to sytem memory.
+ * On successful completion, the object memory will be moved to system memory.
  *
  * This is needed to for special handling of pinned VRAM object during
  * suspend-resume.
@@ -1370,7 +1370,7 @@ static const struct drm_gem_object_funcs xe_gem_object_funcs = {
 /**
  * xe_bo_alloc - Allocate storage for a struct xe_bo
  *
- * This funcition is intended to allocate storage to be used for input
+ * This function is intended to allocate storage to be used for input
  * to __xe_bo_create_locked(), in the case a pointer to the bo to be
  * created is needed before the call to __xe_bo_create_locked().
  * If __xe_bo_create_locked ends up never to be called, then the
@@ -2412,7 +2412,7 @@ int xe_bo_migrate(struct xe_bo *bo, u32 mem_type)
  * @force_alloc: Set force_alloc in ttm_operation_ctx
  *
  * On successful completion, the object memory will be moved to evict
- * placement. Ths function blocks until the object has been fully moved.
+ * placement. This function blocks until the object has been fully moved.
  *
  * Return: 0 on success. Negative error code on failure.
  */
diff --git a/drivers/gpu/drm/xe/xe_bo_doc.h b/drivers/gpu/drm/xe/xe_bo_doc.h
index f57d440cc95a..25a884c64bf1 100644
--- a/drivers/gpu/drm/xe/xe_bo_doc.h
+++ b/drivers/gpu/drm/xe/xe_bo_doc.h
@@ -41,7 +41,7 @@
  * created the BO can be mmap'd (via DRM_IOCTL_XE_GEM_MMAP_OFFSET) for user
  * access and it can be bound for GPU access (via DRM_IOCTL_XE_VM_BIND). All
  * user BOs are evictable and user BOs are never pinned by XE. The allocation of
- * the backing store can be defered from creation time until first use which is
+ * the backing store can be deferred from creation time until first use which is
  * either mmap, bind, or pagefault.
  *
  * Private BOs
diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c
index 6980304c8903..81dc7795c065 100644
--- a/drivers/gpu/drm/xe/xe_devcoredump.c
+++ b/drivers/gpu/drm/xe/xe_devcoredump.c
@@ -48,7 +48,7 @@
  *
  * **Coredump release**:
  *   After a coredump is generated, it stays in kernel memory until released by
- *   userpace by writing anything to it, or after an internal timer expires. The
+ *   userspace by writing anything to it, or after an internal timer expires. The
  *   exact timeout may vary and should not be relied upon. Example to release
  *   a coredump:
  *
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index bb627247c9c8..5cbc96b214fe 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -1000,7 +1000,7 @@ static void xe_device_wedged_fini(struct drm_device *drm, void *arg)
  * xe_device_declare_wedged - Declare device wedged
  * @xe: xe device instance
  *
- * This is a final state that can only be cleared with a mudule
+ * This is a final state that can only be cleared with a module
  * re-probe (unbind + bind).
  * In this state every IOCTL will be blocked so the GT cannot be used.
  * In general it will be called upon any critical error such as gt reset
diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c
index 298a587da7f1..a1710591a8d6 100644
--- a/drivers/gpu/drm/xe/xe_drm_client.c
+++ b/drivers/gpu/drm/xe/xe_drm_client.c
@@ -384,7 +384,7 @@ static void show_run_ticks(struct drm_printer *p, struct drm_file *file)
  * @p: The drm_printer ptr
  * @file: The drm_file ptr
  *
- * This is callabck for drm fdinfo interface. Register this callback
+ * This is callback for drm fdinfo interface. Register this callback
  * in drm driver ops for show_fdinfo.
  *
  * Return: void
diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c
index 31cca938956f..df8ce550deb4 100644
--- a/drivers/gpu/drm/xe/xe_exec.c
+++ b/drivers/gpu/drm/xe/xe_exec.c
@@ -33,7 +33,7 @@
  *
  * In XE we avoid all of this complication by not allowing a BO list to be
  * passed into an exec, using the dma-buf implicit sync uAPI, have binds as
- * seperate operations, and using the DRM scheduler to flow control the ring.
+ * separate operations, and using the DRM scheduler to flow control the ring.
  * Let's deep dive on each of these.
  *
  * We can get away from a BO list by forcing the user to use in / out fences on
diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
index 05154f9de1a6..5fcb2b4c2c13 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.c
+++ b/drivers/gpu/drm/xe/xe_ggtt.c
@@ -362,7 +362,7 @@ int xe_ggtt_init(struct xe_ggtt *ggtt)
 
 	/*
 	 * So we don't need to worry about 64K GGTT layout when dealing with
-	 * scratch entires, rather keep the scratch page in system memory on
+	 * scratch entries, rather keep the scratch page in system memory on
 	 * platforms where 64K pages are needed for VRAM.
 	 */
 	flags = XE_BO_FLAG_PINNED;
diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h
index 82b9b7f82fca..4e4e8e103419 100644
--- a/drivers/gpu/drm/xe/xe_gt.h
+++ b/drivers/gpu/drm/xe/xe_gt.h
@@ -37,7 +37,7 @@ int xe_gt_record_default_lrcs(struct xe_gt *gt);
 
 /**
  * xe_gt_record_user_engines - save data related to engines available to
- * usersapce
+ * userspace
  * @gt: GT structure
  *
  * Walk the available HW engines from gt->info.engine_mask and calculate data
diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c
index 5013d674e17d..71485b96fc6f 100644
--- a/drivers/gpu/drm/xe/xe_gt_mcr.c
+++ b/drivers/gpu/drm/xe/xe_gt_mcr.c
@@ -371,7 +371,7 @@ void xe_gt_mcr_get_dss_steering(struct xe_gt *gt, unsigned int dss, u16 *group,
  * @group: steering group ID
  * @instance: steering instance ID
  *
- * Return: the coverted DSS id.
+ * Return: the converted DSS id.
  */
 u32 xe_gt_mcr_steering_info_to_dss_id(struct xe_gt *gt, u16 group, u16 instance)
 {
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
index bd621df3ab91..878e96281c03 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
@@ -2161,7 +2161,7 @@ bool xe_gt_sriov_pf_config_is_empty(struct xe_gt *gt, unsigned int vfid)
  *
  * This function can only be called on PF.
  *
- * Return: mininum size of the buffer or the number of bytes saved,
+ * Return: minimum size of the buffer or the number of bytes saved,
  *         or a negative error code on failure.
  */
 ssize_t xe_gt_sriov_pf_config_save(struct xe_gt *gt, unsigned int vfid, void *buf, size_t size)
diff --git a/drivers/gpu/drm/xe/xe_guc_capture.c b/drivers/gpu/drm/xe/xe_guc_capture.c
index 137571fae4ed..f6d523e4c5fe 100644
--- a/drivers/gpu/drm/xe/xe_guc_capture.c
+++ b/drivers/gpu/drm/xe/xe_guc_capture.c
@@ -1955,7 +1955,7 @@ xe_engine_snapshot_capture_for_queue(struct xe_exec_queue *q)
 }
 
 /*
- * xe_guc_capture_put_matched_nodes - Cleanup macthed nodes
+ * xe_guc_capture_put_matched_nodes - Cleanup matched nodes
  * @guc: The GuC object
  *
  * Free matched node and all nodes with the equal guc_id from
diff --git a/drivers/gpu/drm/xe/xe_guc_capture_types.h b/drivers/gpu/drm/xe/xe_guc_capture_types.h
index 2057125b1bfa..ca2d390ccbee 100644
--- a/drivers/gpu/drm/xe/xe_guc_capture_types.h
+++ b/drivers/gpu/drm/xe/xe_guc_capture_types.h
@@ -22,7 +22,7 @@ enum capture_register_data_type {
  * struct __guc_mmio_reg_descr - GuC mmio register descriptor
  *
  * xe_guc_capture module uses these structures to define a register
- * (offsets, names, flags,...) that are used at the ADS regisration
+ * (offsets, names, flags,...) that are used at the ADS registration
  * time as well as during runtime processing and reporting of error-
  * capture states generated by GuC just prior to engine reset events.
  */
@@ -48,7 +48,7 @@ struct __guc_mmio_reg_descr {
  *
  * xe_guc_capture module uses these structures to maintain static
  * tables (per unique platform) that consists of lists of registers
- * (offsets, names, flags,...) that are used at the ADS regisration
+ * (offsets, names, flags,...) that are used at the ADS registration
  * time as well as during runtime processing and reporting of error-
  * capture states generated by GuC just prior to engine reset events.
  */
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
index 7d33f3a11e61..8b65c5e959cc 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.c
+++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@@ -710,7 +710,7 @@ static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len,
 	--len;
 	++action;
 
-	/* Write H2G ensuring visable before descriptor update */
+	/* Write H2G ensuring visible before descriptor update */
 	xe_map_memcpy_to(xe, &map, 0, cmd, H2G_CT_HEADERS * sizeof(u32));
 	xe_map_memcpy_to(xe, &map, H2G_CT_HEADERS * sizeof(u32), action, len * sizeof(u32));
 	xe_device_wmb(xe);
@@ -1383,7 +1383,7 @@ static int g2h_read(struct xe_guc_ct *ct, u32 *msg, bool fast_path)
 		 * this function and nowhere else. Hence, they cannot be different
 		 * unless two g2h_read calls are running concurrently. Which is not
 		 * possible because it is guarded by ct->fast_lock. And yet, some
-		 * discrete platforms are reguarly hitting this error :(.
+		 * discrete platforms are regularly hitting this error :(.
 		 *
 		 * desc_head rolling backwards shouldn't cause any noticeable
 		 * problems - just a delay in GuC being allowed to proceed past that
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index 9c36329fe857..913c74d6e2ae 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -1226,7 +1226,7 @@ sched_enable:
 	enable_scheduling(q);
 rearm:
 	/*
-	 * XXX: Ideally want to adjust timeout based on current exection time
+	 * XXX: Ideally want to adjust timeout based on current execution time
 	 * but there is not currently an easy way to do in DRM scheduler. With
 	 * some thought, do this in a follow up.
 	 */
diff --git a/drivers/gpu/drm/xe/xe_hmm.c b/drivers/gpu/drm/xe/xe_hmm.c
index 2c32dc46f7d4..089834467880 100644
--- a/drivers/gpu/drm/xe/xe_hmm.c
+++ b/drivers/gpu/drm/xe/xe_hmm.c
@@ -159,7 +159,7 @@ void xe_hmm_userptr_free_sg(struct xe_userptr_vma *uvma)
  * This function allocates the storage of the userptr sg table.
  * It is caller's responsibility to free it calling sg_free_table.
  *
- * returns: 0 for succuss; negative error no on failure
+ * returns: 0 for success; negative error no on failure
  */
 int xe_hmm_userptr_populate_range(struct xe_userptr_vma *uvma,
 				  bool is_mm_mmap_locked)
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 1b97d90aadda..278bc96cf593 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -1506,7 +1506,7 @@ err_bb:
  * using the default engine for the updates, they will be performed in the
  * order they grab the job_mutex. If different engines are used, external
  * synchronization is needed for overlapping updates to maintain page-table
- * consistency. Note that the meaing of "overlapping" is that the updates
+ * consistency. Note that the meaning of "overlapping" is that the updates
  * touch the same page-table, which might be a higher-level page-directory.
  * If no pipelining is needed, then updates may be performed by the cpu.
  *
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 6b7f77425c7f..39be74848e44 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -490,7 +490,7 @@ static void read_gmdid(struct xe_device *xe, enum xe_gmdid_type type, u32 *ver,
 		 * least basic xe_gt and xe_guc initialization.
 		 *
 		 * Since to obtain the value of GMDID_MEDIA we need to use the
-		 * media GuC, temporarly tweak the gt type.
+		 * media GuC, temporarily tweak the gt type.
 		 */
 		xe_gt_assert(gt, gt->info.type == XE_GT_TYPE_UNINITIALIZED);
 
@@ -781,7 +781,7 @@ static void xe_pci_remove(struct pci_dev *pdev)
  * error injectable functions is proper handling of the error code by the
  * caller for recovery, which is always the case here. The second
  * requirement is that no state is changed before the first error return.
- * It is not strictly fullfilled for all initialization functions using the
+ * It is not strictly fulfilled for all initialization functions using the
  * ALLOW_ERROR_INJECTION() macro but this is acceptable because for those
  * error cases at probe time, the error code is simply propagated up by the
  * caller. Therefore there is no consequence on those specific callers when
diff --git a/drivers/gpu/drm/xe/xe_pcode.c b/drivers/gpu/drm/xe/xe_pcode.c
index d95d9835de42..9333ce776a6e 100644
--- a/drivers/gpu/drm/xe/xe_pcode.c
+++ b/drivers/gpu/drm/xe/xe_pcode.c
@@ -217,7 +217,7 @@ out:
  *
  * It returns 0 on success, and -ERROR number on failure, -EINVAL if max
  * frequency is higher then the minimal, and other errors directly translated
- * from the PCODE Error returs:
+ * from the PCODE Error returns:
  * - -ENXIO: "Illegal Command"
  * - -ETIMEDOUT: "Timed out"
  * - -EINVAL: "Illegal Data"
diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
index 85cfec5ec9af..e434b0b1870b 100644
--- a/drivers/gpu/drm/xe/xe_pm.c
+++ b/drivers/gpu/drm/xe/xe_pm.c
@@ -391,7 +391,7 @@ int xe_pm_runtime_suspend(struct xe_device *xe)
 
 	/*
 	 * Applying lock for entire list op as xe_ttm_bo_destroy and xe_bo_move_notify
-	 * also checks and delets bo entry from user fault list.
+	 * also checks and deletes bo entry from user fault list.
 	 */
 	mutex_lock(&xe->mem_access.vram_userfault.lock);
 	list_for_each_entry_safe(bo, on,
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index 65c3c1688710..1ddcc7e79a93 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -276,7 +276,7 @@ struct xe_pt_stage_bind_walk {
 	/* Also input, but is updated during the walk*/
 	/** @curs: The DMA address cursor. */
 	struct xe_res_cursor *curs;
-	/** @va_curs_start: The Virtual address coresponding to @curs->start */
+	/** @va_curs_start: The Virtual address corresponding to @curs->start */
 	u64 va_curs_start;
 
 	/* Output */
diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h
index 7dea10d732d7..38b9f13bba5e 100644
--- a/drivers/gpu/drm/xe/xe_rtp.h
+++ b/drivers/gpu/drm/xe/xe_rtp.h
@@ -131,7 +131,7 @@ struct xe_reg_sr;
  * @ver_end__: Last graphics IP version to match
  *
  * Note that the range matching this rule is [ @ver_start__, @ver_end__ ], i.e.
- * inclusive on boths sides
+ * inclusive on both sides
  *
  * Refer to XE_RTP_RULES() for expected usage.
  */
@@ -169,7 +169,7 @@ struct xe_reg_sr;
  * @ver_end__: Last media IP version to match
  *
  * Note that the range matching this rule is [ @ver_start__, @ver_end__ ], i.e.
- * inclusive on boths sides
+ * inclusive on both sides
  *
  * Refer to XE_RTP_RULES() for expected usage.
  */
diff --git a/drivers/gpu/drm/xe/xe_uc_fw_types.h b/drivers/gpu/drm/xe/xe_uc_fw_types.h
index 0d8caa0e7354..ad3b35a0e6eb 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw_types.h
+++ b/drivers/gpu/drm/xe/xe_uc_fw_types.h
@@ -92,7 +92,7 @@ struct xe_uc_fw {
 		const enum xe_uc_fw_status status;
 		/**
 		 * @__status: private firmware load status - only to be used
-		 * by firmware laoding code
+		 * by firmware loading code
 		 */
 		enum xe_uc_fw_status __status;
 	};
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 74d684708b00..e86c4cc47884 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -1025,7 +1025,7 @@ static void xe_vma_destroy_late(struct xe_vma *vma)
 
 		/*
 		 * Since userptr pages are not pinned, we can't remove
-		 * the notifer until we're sure the GPU is not accessing
+		 * the notifier until we're sure the GPU is not accessing
 		 * them anymore
 		 */
 		mmu_interval_notifier_remove(&userptr->notifier);
@@ -2108,7 +2108,7 @@ static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op)
 			}
 		}
 
-		/* Adjust for partial unbind after removin VMA from VM */
+		/* Adjust for partial unbind after removing VMA from VM */
 		if (!err) {
 			op->base.remap.unmap->va->va.addr = op->remap.start;
 			op->base.remap.unmap->va->va.range = op->remap.range;
-- 
2.51.0


From ee5a1321df90891d59d83b7c9d5b6c5b755d059d Mon Sep 17 00:00:00 2001
From: Jesus Narvaez <jesus.narvaez@intel.com>
Date: Thu, 12 Dec 2024 11:01:00 -0800
Subject: [PATCH 05/16] drm/xe/guc: Adding steering info support for GuC
 register lists

The guc_mmio_reg interface supports steering, but it is currently not
implemented. This will allow the GuC to control steering of MMIO
registers after save-restore and avoid reading from fused off MCR
register instances.

Fixes: 9c57bc08652a ("drm/xe/lnl: Drop force_probe requirement")
Signed-off-by: Jesus Narvaez <jesus.narvaez@intel.com>
Cc: Matt Roper <matthew.d.roper@intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Reviewed-by: Jonathan Cavitt <jonathan.cavitt@intel.com>
Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20241212190100.3768068-1-jesus.narvaez@intel.com
---
 drivers/gpu/drm/xe/xe_gt_mcr.c  |  6 +++---
 drivers/gpu/drm/xe/xe_gt_mcr.h  |  4 ++++
 drivers/gpu/drm/xe/xe_guc_ads.c | 15 +++++++++++++++
 3 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c
index 71485b96fc6f..a1676b787fdc 100644
--- a/drivers/gpu/drm/xe/xe_gt_mcr.c
+++ b/drivers/gpu/drm/xe/xe_gt_mcr.c
@@ -550,9 +550,9 @@ void xe_gt_mcr_set_implicit_defaults(struct xe_gt *gt)
  * Returns true if the caller should steer to the @group/@instance values
  * returned.  Returns false if the caller need not perform any steering
  */
-static bool xe_gt_mcr_get_nonterminated_steering(struct xe_gt *gt,
-						 struct xe_reg_mcr reg_mcr,
-						 u8 *group, u8 *instance)
+bool xe_gt_mcr_get_nonterminated_steering(struct xe_gt *gt,
+					  struct xe_reg_mcr reg_mcr,
+					  u8 *group, u8 *instance)
 {
 	const struct xe_reg reg = to_xe_reg(reg_mcr);
 	const struct xe_mmio_range *implicit_ranges;
diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.h b/drivers/gpu/drm/xe/xe_gt_mcr.h
index c0cd36021c24..bc06520befab 100644
--- a/drivers/gpu/drm/xe/xe_gt_mcr.h
+++ b/drivers/gpu/drm/xe/xe_gt_mcr.h
@@ -26,6 +26,10 @@ void xe_gt_mcr_unicast_write(struct xe_gt *gt, struct xe_reg_mcr mcr_reg,
 void xe_gt_mcr_multicast_write(struct xe_gt *gt, struct xe_reg_mcr mcr_reg,
 			       u32 value);
 
+bool xe_gt_mcr_get_nonterminated_steering(struct xe_gt *gt,
+					  struct xe_reg_mcr reg_mcr,
+					  u8 *group, u8 *instance);
+
 void xe_gt_mcr_steering_dump(struct xe_gt *gt, struct drm_printer *p);
 void xe_gt_mcr_get_dss_steering(struct xe_gt *gt, unsigned int dss, u16 *group, u16 *instance);
 u32 xe_gt_mcr_steering_info_to_dss_id(struct xe_gt *gt, u16 group, u16 instance);
diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c
index 943146e5b460..fab259adc380 100644
--- a/drivers/gpu/drm/xe/xe_guc_ads.c
+++ b/drivers/gpu/drm/xe/xe_guc_ads.c
@@ -29,6 +29,7 @@
 #include "xe_platform_types.h"
 #include "xe_uc_fw.h"
 #include "xe_wa.h"
+#include "xe_gt_mcr.h"
 
 /* Slack of a few additional entries per engine */
 #define ADS_REGSET_EXTRA_MAX	8
@@ -696,6 +697,20 @@ static void guc_mmio_regset_write_one(struct xe_guc_ads *ads,
 		.flags = reg.masked ? GUC_REGSET_MASKED : 0,
 	};
 
+	if (reg.mcr) {
+		struct xe_reg_mcr mcr_reg = XE_REG_MCR(reg.addr);
+		u8 group, instance;
+
+		bool steer = xe_gt_mcr_get_nonterminated_steering(ads_to_gt(ads), mcr_reg,
+								  &group, &instance);
+
+		if (steer) {
+			entry.flags |= FIELD_PREP(GUC_REGSET_STEERING_GROUP, group);
+			entry.flags |= FIELD_PREP(GUC_REGSET_STEERING_INSTANCE, instance);
+			entry.flags |= GUC_REGSET_STEERING_NEEDED;
+		}
+	}
+
 	xe_map_memcpy_to(ads_to_xe(ads), regset_map, n_entry * sizeof(entry),
 			 &entry, sizeof(entry));
 }
-- 
2.51.0


From 6a04bb5a2046067681257d5dd69a724856c8fbcb Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Thu, 9 Jan 2025 12:50:32 +0200
Subject: [PATCH 06/16] drm/xe: remove unused xe_pciids.h harder, add missing
 PCI ID

Commit 493454445c95 ("drm/xe: switch to common PCI ID macros") removed
xe_pciids.h via drm-intel-next. In the mean time, commit ae78ec0a52c4
("drm/xe/ptl: Add another PTL PCI ID") added to xe_pciids.h via
drm-xe-next.

The two commits were merged in commit 8f109f287fdc ("Merge drm/drm-next
into drm-xe-next"), but xe_pciids.h wasn't removed, and the PCI ID
wasn't added to pciids.h.

Remove xe_pciids.h, and add the PCI ID to pciids.h.

Cc: Matt Atwood <matthew.s.atwood@intel.com>
Cc: Matt Roper <matthew.d.roper@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Fixes: 8f109f287fdc ("Merge drm/drm-next into drm-xe-next")
Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Closes: https://lore.kernel.org/r/20241125120921.1bbc1930@canb.auug.org.au
Reviewed-by: Tejas Upadhyay <tejas.upadhyay@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20250109105032.2585416-1-jani.nikula@intel.com
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 include/drm/intel/pciids.h    |   3 +-
 include/drm/intel/xe_pciids.h | 235 ----------------------------------
 2 files changed, 2 insertions(+), 236 deletions(-)
 delete mode 100644 include/drm/intel/xe_pciids.h

diff --git a/include/drm/intel/pciids.h b/include/drm/intel/pciids.h
index 32480b5563db..7883384acd5e 100644
--- a/include/drm/intel/pciids.h
+++ b/include/drm/intel/pciids.h
@@ -829,6 +829,7 @@
 	MACRO__(0xB092, ## __VA_ARGS__), \
 	MACRO__(0xB0A0, ## __VA_ARGS__), \
 	MACRO__(0xB0A1, ## __VA_ARGS__), \
-	MACRO__(0xB0A2, ## __VA_ARGS__)
+	MACRO__(0xB0A2, ## __VA_ARGS__), \
+	MACRO__(0xB0B0, ## __VA_ARGS__)
 
 #endif /* __PCIIDS_H__ */
diff --git a/include/drm/intel/xe_pciids.h b/include/drm/intel/xe_pciids.h
deleted file mode 100644
index 16d4b8bb590a..000000000000
--- a/include/drm/intel/xe_pciids.h
+++ /dev/null
@@ -1,235 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-/*
- * Copyright Â© 2022 Intel Corporation
- */
-
-#ifndef _XE_PCIIDS_H_
-#define _XE_PCIIDS_H_
-
-/*
- * Lists below can be turned into initializers for a struct pci_device_id
- * by defining INTEL_VGA_DEVICE:
- *
- * #define INTEL_VGA_DEVICE(id, info) { \
- *	0x8086, id,			\
- *	~0, ~0,				\
- *	0x030000, 0xff0000,		\
- *	(unsigned long) info }
- *
- * And then calling like:
- *
- * XE_TGL_12_GT1_IDS(INTEL_VGA_DEVICE, ## __VA_ARGS__)
- *
- * To turn them into something else, just provide a different macro passed as
- * first argument.
- */
-
-/* TGL */
-#define XE_TGL_GT1_IDS(MACRO__, ...)		\
-	MACRO__(0x9A60, ## __VA_ARGS__),	\
-	MACRO__(0x9A68, ## __VA_ARGS__),	\
-	MACRO__(0x9A70, ## __VA_ARGS__)
-
-#define XE_TGL_GT2_IDS(MACRO__, ...)		\
-	MACRO__(0x9A40, ## __VA_ARGS__),	\
-	MACRO__(0x9A49, ## __VA_ARGS__),	\
-	MACRO__(0x9A59, ## __VA_ARGS__),	\
-	MACRO__(0x9A78, ## __VA_ARGS__),	\
-	MACRO__(0x9AC0, ## __VA_ARGS__),	\
-	MACRO__(0x9AC9, ## __VA_ARGS__),	\
-	MACRO__(0x9AD9, ## __VA_ARGS__),	\
-	MACRO__(0x9AF8, ## __VA_ARGS__)
-
-#define XE_TGL_IDS(MACRO__, ...)		\
-	XE_TGL_GT1_IDS(MACRO__, ## __VA_ARGS__),\
-	XE_TGL_GT2_IDS(MACRO__, ## __VA_ARGS__)
-
-/* RKL */
-#define XE_RKL_IDS(MACRO__, ...)		\
-	MACRO__(0x4C80, ## __VA_ARGS__),	\
-	MACRO__(0x4C8A, ## __VA_ARGS__),	\
-	MACRO__(0x4C8B, ## __VA_ARGS__),	\
-	MACRO__(0x4C8C, ## __VA_ARGS__),	\
-	MACRO__(0x4C90, ## __VA_ARGS__),	\
-	MACRO__(0x4C9A, ## __VA_ARGS__)
-
-/* DG1 */
-#define XE_DG1_IDS(MACRO__, ...)		\
-	MACRO__(0x4905, ## __VA_ARGS__),	\
-	MACRO__(0x4906, ## __VA_ARGS__),	\
-	MACRO__(0x4907, ## __VA_ARGS__),	\
-	MACRO__(0x4908, ## __VA_ARGS__),	\
-	MACRO__(0x4909, ## __VA_ARGS__)
-
-/* ADL-S */
-#define XE_ADLS_IDS(MACRO__, ...)		\
-	MACRO__(0x4680, ## __VA_ARGS__),	\
-	MACRO__(0x4682, ## __VA_ARGS__),	\
-	MACRO__(0x4688, ## __VA_ARGS__),	\
-	MACRO__(0x468A, ## __VA_ARGS__),	\
-	MACRO__(0x468B, ## __VA_ARGS__),	\
-	MACRO__(0x4690, ## __VA_ARGS__),	\
-	MACRO__(0x4692, ## __VA_ARGS__),	\
-	MACRO__(0x4693, ## __VA_ARGS__)
-
-/* ADL-P */
-#define XE_ADLP_IDS(MACRO__, ...)		\
-	MACRO__(0x46A0, ## __VA_ARGS__),	\
-	MACRO__(0x46A1, ## __VA_ARGS__),	\
-	MACRO__(0x46A2, ## __VA_ARGS__),	\
-	MACRO__(0x46A3, ## __VA_ARGS__),	\
-	MACRO__(0x46A6, ## __VA_ARGS__),	\
-	MACRO__(0x46A8, ## __VA_ARGS__),	\
-	MACRO__(0x46AA, ## __VA_ARGS__),	\
-	MACRO__(0x462A, ## __VA_ARGS__),	\
-	MACRO__(0x4626, ## __VA_ARGS__),	\
-	MACRO__(0x4628, ## __VA_ARGS__),	\
-	MACRO__(0x46B0, ## __VA_ARGS__),	\
-	MACRO__(0x46B1, ## __VA_ARGS__),	\
-	MACRO__(0x46B2, ## __VA_ARGS__),	\
-	MACRO__(0x46B3, ## __VA_ARGS__),	\
-	MACRO__(0x46C0, ## __VA_ARGS__),	\
-	MACRO__(0x46C1, ## __VA_ARGS__),	\
-	MACRO__(0x46C2, ## __VA_ARGS__),	\
-	MACRO__(0x46C3, ## __VA_ARGS__)
-
-/* ADL-N */
-#define XE_ADLN_IDS(MACRO__, ...)		\
-	MACRO__(0x46D0, ## __VA_ARGS__),	\
-	MACRO__(0x46D1, ## __VA_ARGS__),	\
-	MACRO__(0x46D2, ## __VA_ARGS__),	\
-	MACRO__(0x46D3, ## __VA_ARGS__),	\
-	MACRO__(0x46D4, ## __VA_ARGS__)
-
-/* RPL-S */
-#define XE_RPLS_IDS(MACRO__, ...)		\
-	MACRO__(0xA780, ## __VA_ARGS__),	\
-	MACRO__(0xA781, ## __VA_ARGS__),	\
-	MACRO__(0xA782, ## __VA_ARGS__),	\
-	MACRO__(0xA783, ## __VA_ARGS__),	\
-	MACRO__(0xA788, ## __VA_ARGS__),	\
-	MACRO__(0xA789, ## __VA_ARGS__),	\
-	MACRO__(0xA78A, ## __VA_ARGS__),	\
-	MACRO__(0xA78B, ## __VA_ARGS__)
-
-/* RPL-U */
-#define XE_RPLU_IDS(MACRO__, ...)		\
-	MACRO__(0xA721, ## __VA_ARGS__),	\
-	MACRO__(0xA7A1, ## __VA_ARGS__),	\
-	MACRO__(0xA7A9, ## __VA_ARGS__),	\
-	MACRO__(0xA7AC, ## __VA_ARGS__),	\
-	MACRO__(0xA7AD, ## __VA_ARGS__)
-
-/* RPL-P */
-#define XE_RPLP_IDS(MACRO__, ...)		\
-	MACRO__(0xA720, ## __VA_ARGS__),	\
-	MACRO__(0xA7A0, ## __VA_ARGS__),	\
-	MACRO__(0xA7A8, ## __VA_ARGS__),	\
-	MACRO__(0xA7AA, ## __VA_ARGS__),	\
-	MACRO__(0xA7AB, ## __VA_ARGS__)
-
-/* DG2 */
-#define XE_DG2_G10_IDS(MACRO__, ...)		\
-	MACRO__(0x5690, ## __VA_ARGS__),	\
-	MACRO__(0x5691, ## __VA_ARGS__),	\
-	MACRO__(0x5692, ## __VA_ARGS__),	\
-	MACRO__(0x56A0, ## __VA_ARGS__),	\
-	MACRO__(0x56A1, ## __VA_ARGS__),	\
-	MACRO__(0x56A2, ## __VA_ARGS__),	\
-	MACRO__(0x56BE, ## __VA_ARGS__),	\
-	MACRO__(0x56BF, ## __VA_ARGS__)
-
-#define XE_DG2_G11_IDS(MACRO__, ...)		\
-	MACRO__(0x5693, ## __VA_ARGS__),	\
-	MACRO__(0x5694, ## __VA_ARGS__),	\
-	MACRO__(0x5695, ## __VA_ARGS__),	\
-	MACRO__(0x56A5, ## __VA_ARGS__),	\
-	MACRO__(0x56A6, ## __VA_ARGS__),	\
-	MACRO__(0x56B0, ## __VA_ARGS__),	\
-	MACRO__(0x56B1, ## __VA_ARGS__),	\
-	MACRO__(0x56BA, ## __VA_ARGS__),	\
-	MACRO__(0x56BB, ## __VA_ARGS__),	\
-	MACRO__(0x56BC, ## __VA_ARGS__),	\
-	MACRO__(0x56BD, ## __VA_ARGS__)
-
-#define XE_DG2_G12_IDS(MACRO__, ...)		\
-	MACRO__(0x5696, ## __VA_ARGS__),	\
-	MACRO__(0x5697, ## __VA_ARGS__),	\
-	MACRO__(0x56A3, ## __VA_ARGS__),	\
-	MACRO__(0x56A4, ## __VA_ARGS__),	\
-	MACRO__(0x56B2, ## __VA_ARGS__),	\
-	MACRO__(0x56B3, ## __VA_ARGS__)
-
-#define XE_DG2_IDS(MACRO__, ...)		\
-	XE_DG2_G10_IDS(MACRO__, ## __VA_ARGS__),\
-	XE_DG2_G11_IDS(MACRO__, ## __VA_ARGS__),\
-	XE_DG2_G12_IDS(MACRO__, ## __VA_ARGS__)
-
-#define XE_ATS_M150_IDS(MACRO__, ...)		\
-	MACRO__(0x56C0, ## __VA_ARGS__),	\
-	MACRO__(0x56C2, ## __VA_ARGS__)
-
-#define XE_ATS_M75_IDS(MACRO__, ...)		\
-	MACRO__(0x56C1, ## __VA_ARGS__)
-
-#define XE_ATS_M_IDS(MACRO__, ...)		\
-	XE_ATS_M150_IDS(MACRO__, ## __VA_ARGS__),\
-	XE_ATS_M75_IDS(MACRO__, ## __VA_ARGS__)
-
-/* ARL */
-#define XE_ARL_IDS(MACRO__, ...)		\
-	MACRO__(0x7D41, ## __VA_ARGS__),	\
-	MACRO__(0x7D51, ## __VA_ARGS__),        \
-	MACRO__(0x7D67, ## __VA_ARGS__),	\
-	MACRO__(0x7DD1, ## __VA_ARGS__),	\
-	MACRO__(0xB640, ## __VA_ARGS__)
-
-/* MTL */
-#define XE_MTL_IDS(MACRO__, ...)		\
-	MACRO__(0x7D40, ## __VA_ARGS__),	\
-	MACRO__(0x7D45, ## __VA_ARGS__),	\
-	MACRO__(0x7D55, ## __VA_ARGS__),	\
-	MACRO__(0x7D60, ## __VA_ARGS__),	\
-	MACRO__(0x7DD5, ## __VA_ARGS__)
-
-/* PVC */
-#define XE_PVC_IDS(MACRO__, ...)		\
-	MACRO__(0x0B69, ## __VA_ARGS__),	\
-	MACRO__(0x0B6E, ## __VA_ARGS__),	\
-	MACRO__(0x0BD4, ## __VA_ARGS__),	\
-	MACRO__(0x0BD5, ## __VA_ARGS__),	\
-	MACRO__(0x0BD6, ## __VA_ARGS__),	\
-	MACRO__(0x0BD7, ## __VA_ARGS__),	\
-	MACRO__(0x0BD8, ## __VA_ARGS__),	\
-	MACRO__(0x0BD9, ## __VA_ARGS__),	\
-	MACRO__(0x0BDA, ## __VA_ARGS__),	\
-	MACRO__(0x0BDB, ## __VA_ARGS__),	\
-	MACRO__(0x0BE0, ## __VA_ARGS__),	\
-	MACRO__(0x0BE1, ## __VA_ARGS__),	\
-	MACRO__(0x0BE5, ## __VA_ARGS__)
-
-#define XE_LNL_IDS(MACRO__, ...) \
-	MACRO__(0x6420, ## __VA_ARGS__), \
-	MACRO__(0x64A0, ## __VA_ARGS__), \
-	MACRO__(0x64B0, ## __VA_ARGS__)
-
-#define XE_BMG_IDS(MACRO__, ...) \
-	MACRO__(0xE202, ## __VA_ARGS__), \
-	MACRO__(0xE20B, ## __VA_ARGS__), \
-	MACRO__(0xE20C, ## __VA_ARGS__), \
-	MACRO__(0xE20D, ## __VA_ARGS__), \
-	MACRO__(0xE212, ## __VA_ARGS__)
-
-#define XE_PTL_IDS(MACRO__, ...) \
-	MACRO__(0xB080, ## __VA_ARGS__), \
-	MACRO__(0xB081, ## __VA_ARGS__), \
-	MACRO__(0xB082, ## __VA_ARGS__), \
-	MACRO__(0xB090, ## __VA_ARGS__), \
-	MACRO__(0xB091, ## __VA_ARGS__), \
-	MACRO__(0xB092, ## __VA_ARGS__), \
-	MACRO__(0xB0A0, ## __VA_ARGS__), \
-	MACRO__(0xB0A1, ## __VA_ARGS__), \
-	MACRO__(0xB0A2, ## __VA_ARGS__), \
-	MACRO__(0xB0B0, ## __VA_ARGS__)
-
-#endif
-- 
2.51.0


From 155c77f45f63dd58a37eeb0896b0b140ab785836 Mon Sep 17 00:00:00 2001
From: Maciej Patelczyk <maciej.patelczyk@intel.com>
Date: Wed, 11 Dec 2024 12:17:26 +0100
Subject: [PATCH 07/16] drm/xe: introduce xe_gt_reset and xe_gt_wait_for_reset

Add synchronous version gt reset as there are few places where it
is expected.
Also add a wait helper to wait until gt reset is done.

Signed-off-by: Maciej Patelczyk <maciej.patelczyk@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Fixes: f3bc5bb4d53d ("drm/xe: Allow userspace to configure CCS mode")
Reviewed-by: Nirmoy Das <nirmoy.das@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20241211111727.1481476-2-maciej.patelczyk@intel.com
Signed-off-by: Nirmoy Das <nirmoy.das@intel.com>
---
 drivers/gpu/drm/xe/tests/xe_bo.c   |  7 +++----
 drivers/gpu/drm/xe/tests/xe_mocs.c |  3 +--
 drivers/gpu/drm/xe/xe_gt.h         | 25 +++++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_gt_debugfs.c |  4 +---
 4 files changed, 30 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c
index 405ff904153e..6795d1d916e4 100644
--- a/drivers/gpu/drm/xe/tests/xe_bo.c
+++ b/drivers/gpu/drm/xe/tests/xe_bo.c
@@ -264,10 +264,9 @@ static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struc
 		 * however seems quite fragile not to also restart the GT. Try
 		 * to do that here by triggering a GT reset.
 		 */
-		for_each_gt(__gt, xe, id) {
-			xe_gt_reset_async(__gt);
-			flush_work(&__gt->reset.worker);
-		}
+		for_each_gt(__gt, xe, id)
+			xe_gt_reset(__gt);
+
 		if (err) {
 			KUNIT_FAIL(test, "restore kernel err=%pe\n",
 				   ERR_PTR(err));
diff --git a/drivers/gpu/drm/xe/tests/xe_mocs.c b/drivers/gpu/drm/xe/tests/xe_mocs.c
index d3f71d13eb81..ef1e5256c56a 100644
--- a/drivers/gpu/drm/xe/tests/xe_mocs.c
+++ b/drivers/gpu/drm/xe/tests/xe_mocs.c
@@ -162,8 +162,7 @@ static int mocs_reset_test_run_device(struct xe_device *xe)
 		if (flags & HAS_LNCF_MOCS)
 			read_l3cc_table(gt, &mocs.table);
 
-		xe_gt_reset_async(gt);
-		flush_work(&gt->reset.worker);
+		xe_gt_reset(gt);
 
 		kunit_info(test, "mocs_reset_test after reset\n");
 		if (flags & HAS_GLOBAL_MOCS)
diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h
index 4e4e8e103419..e504cc33ade4 100644
--- a/drivers/gpu/drm/xe/xe_gt.h
+++ b/drivers/gpu/drm/xe/xe_gt.h
@@ -56,6 +56,31 @@ void xe_gt_sanitize(struct xe_gt *gt);
 int xe_gt_sanitize_freq(struct xe_gt *gt);
 void xe_gt_remove(struct xe_gt *gt);
 
+/**
+ * xe_gt_wait_for_reset - wait for gt's async reset to finalize.
+ * @gt: GT structure
+ * Return:
+ * %true if it waited for the work to finish execution,
+ * %false if there was no scheduled reset or it was done.
+ */
+static inline bool xe_gt_wait_for_reset(struct xe_gt *gt)
+{
+	return flush_work(&gt->reset.worker);
+}
+
+/**
+ * xe_gt_reset - perform synchronous reset
+ * @gt: GT structure
+ * Return:
+ * %true if it waited for the reset to finish,
+ * %false if there was no scheduled reset.
+ */
+static inline bool xe_gt_reset(struct xe_gt *gt)
+{
+	xe_gt_reset_async(gt);
+	return xe_gt_wait_for_reset(gt);
+}
+
 /**
  * xe_gt_any_hw_engine_by_reset_domain - scan the list of engines and return the
  * first that matches the same reset domain as @class
diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c
index 3e8c351a0eab..e7792858b1e4 100644
--- a/drivers/gpu/drm/xe/xe_gt_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c
@@ -132,11 +132,9 @@ static int force_reset(struct xe_gt *gt, struct drm_printer *p)
 static int force_reset_sync(struct xe_gt *gt, struct drm_printer *p)
 {
 	xe_pm_runtime_get(gt_to_xe(gt));
-	xe_gt_reset_async(gt);
+	xe_gt_reset(gt);
 	xe_pm_runtime_put(gt_to_xe(gt));
 
-	flush_work(&gt->reset.worker);
-
 	return 0;
 }
 
-- 
2.51.0


From 480fb9806e2e073532f7786166287114c696b340 Mon Sep 17 00:00:00 2001
From: Maciej Patelczyk <maciej.patelczyk@intel.com>
Date: Wed, 11 Dec 2024 12:17:27 +0100
Subject: [PATCH 08/16] drm/xe: make change ccs_mode a synchronous action

If ccs_mode is being modified via
   /sys/class/drm/cardX/device/tileY/gtY/ccs_mode
the asynchronous reset is triggered and the write returns immediately.

With that some test receive false information about number of CCS engines
or even fail if they proceed without delay after changing the ccs_mode.

Changing the ccs_mode change from async to sync to prevent failures in
tests.

Signed-off-by: Maciej Patelczyk <maciej.patelczyk@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Fixes: f3bc5bb4d53d ("drm/xe: Allow userspace to configure CCS mode")
Reviewed-by: Nirmoy Das <nirmoy.das@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20241211111727.1481476-3-maciej.patelczyk@intel.com
Signed-off-by: Nirmoy Das <nirmoy.das@intel.com>
---
 drivers/gpu/drm/xe/xe_gt_ccs_mode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c
index b6adfb9f2030..50fffc9ebf62 100644
--- a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c
+++ b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c
@@ -150,7 +150,7 @@ ccs_mode_store(struct device *kdev, struct device_attribute *attr,
 		xe_gt_info(gt, "Setting compute mode to %d\n", num_engines);
 		gt->ccs_mode = num_engines;
 		xe_gt_record_user_engines(gt);
-		xe_gt_reset_async(gt);
+		xe_gt_reset(gt);
 	}
 
 	mutex_unlock(&xe->drm.filelist_mutex);
-- 
2.51.0


From 92029e0baa5313ba208103f90086f59070bbf93b Mon Sep 17 00:00:00 2001
From: Nirmoy Das <nirmoy.das@intel.com>
Date: Wed, 8 Jan 2025 15:13:23 +0100
Subject: [PATCH 09/16] drm/xe/ptl: Apply Wa_14023061436

Enable WMTP for the BTD kernel to address Wa14023061436 by setting the
proper TDL Chicken Bit.

v2: Apply it on engine_was[] as this register is not part of LRC(Matt)
    Apply it for first_render_or_compute in case this gets extended to
    compute only platforms(Matt).

Cc: Gustavo Sousa <gustavo.sousa@intel.com>
Cc: Matt Roper <matthew.d.roper@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20250108141323.311601-1-nirmoy.das@intel.com
Signed-off-by: Nirmoy Das <nirmoy.das@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_gt_regs.h | 3 +++
 drivers/gpu/drm/xe/xe_wa.c           | 5 +++++
 2 files changed, 8 insertions(+)

diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index 162f18e975da..b4283ac030f4 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -500,6 +500,9 @@
 #define   LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK	REG_GENMASK(13, 11)
 #define   DIS_ATOMIC_CHAINING_TYPED_WRITES	REG_BIT(3)
 
+#define TDL_CHICKEN				XE_REG_MCR(0xe5f4, XE_REG_OPTION_MASKED)
+#define   QID_WAIT_FOR_THREAD_NOT_RUN_DISABLE	REG_BIT(12)
+
 #define LSC_CHICKEN_BIT_0			XE_REG_MCR(0xe7c8)
 #define   DISABLE_D8_D16_COASLESCE		REG_BIT(30)
 #define   WR_REQ_CHAINING_DIS			REG_BIT(26)
diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index 570fe0376402..744dba4fdb58 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -613,6 +613,11 @@ static const struct xe_rtp_entry_sr engine_was[] = {
 	  XE_RTP_ACTIONS(FIELD_SET(SAMPLER_MODE, SMP_WAIT_FETCH_MERGING_COUNTER,
 				   SMP_FORCE_128B_OVERFETCH))
 	},
+	{ XE_RTP_NAME("14023061436"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3001),
+		       FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(SET(TDL_CHICKEN, QID_WAIT_FOR_THREAD_NOT_RUN_DISABLE))
+	},
 
 	{}
 };
-- 
2.51.0


From c26f22dac3449d8a687237cdfc59a6445eb8f75a Mon Sep 17 00:00:00 2001
From: Ashutosh Dixit <ashutosh.dixit@intel.com>
Date: Fri, 10 Jan 2025 18:15:39 -0800
Subject: [PATCH 10/16] drm/xe/oa: Add missing VISACTL mux registers

Add missing VISACTL mux registers required for some OA
config's (e.g. RenderPipeCtrl).

Fixes: cdf02fe1a94a ("drm/xe/oa/uapi: Add/remove OA config perf ops")
Cc: stable@vger.kernel.org
Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20250111021539.2920346-1-ashutosh.dixit@intel.com
---
 drivers/gpu/drm/xe/xe_oa.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
index 4e00a77289c5..eeb96b5f49e2 100644
--- a/drivers/gpu/drm/xe/xe_oa.c
+++ b/drivers/gpu/drm/xe/xe_oa.c
@@ -2163,6 +2163,7 @@ static const struct xe_mmio_range xe2_oa_mux_regs[] = {
 	{ .start = 0x5194, .end = 0x5194 },	/* SYS_MEM_LAT_MEASURE_MERTF_GRP_3D */
 	{ .start = 0x8704, .end = 0x8704 },	/* LMEM_LAT_MEASURE_MCFG_GRP */
 	{ .start = 0xB1BC, .end = 0xB1BC },	/* L3_BANK_LAT_MEASURE_LBCF_GFX */
+	{ .start = 0xD0E0, .end = 0xD0F4 },	/* VISACTL */
 	{ .start = 0xE18C, .end = 0xE18C },	/* SAMPLER_MODE */
 	{ .start = 0xE590, .end = 0xE590 },	/* TDL_LSC_LAT_MEASURE_TDL_GFX */
 	{ .start = 0x13000, .end = 0x137FC },	/* PES_0_PESL0 - PES_63_UPPER_PESL3 */
-- 
2.51.0


From d160dc6f53914d729be7fcb7afbd0e9e6a3725b2 Mon Sep 17 00:00:00 2001
From: Vinay Belgaumkar <vinay.belgaumkar@intel.com>
Date: Fri, 10 Jan 2025 09:33:09 -0800
Subject: [PATCH 11/16] drm/xe: Add locks in gtidle code

The update of the residency values needs to be protected by a lock to
avoid multiple entrypoints, for example when multiple userspace clients
read the sysfs file. Other in-kernel clients are going to be added to
sample these values, making the problem worse. Protect those updates
with a raw_spinlock so it can be called by future integration with perf
pmu.

Suggested-by: Lucas De Marchi <lucas.demarchi@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Vinay Belgaumkar <vinay.belgaumkar@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20250110173308.2412232-2-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt_idle.c       | 23 ++++++++++++++++++++---
 drivers/gpu/drm/xe/xe_gt_idle.h       |  1 +
 drivers/gpu/drm/xe/xe_gt_idle_types.h |  3 +++
 3 files changed, 24 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_gt_idle.c b/drivers/gpu/drm/xe/xe_gt_idle.c
index ffd3ba7f6656..fbbace7b0b12 100644
--- a/drivers/gpu/drm/xe/xe_gt_idle.c
+++ b/drivers/gpu/drm/xe/xe_gt_idle.c
@@ -69,6 +69,8 @@ static u64 get_residency_ms(struct xe_gt_idle *gtidle, u64 cur_residency)
 {
 	u64 delta, overflow_residency, prev_residency;
 
+	lockdep_assert_held(&gtidle->lock);
+
 	overflow_residency = BIT_ULL(32);
 
 	/*
@@ -275,8 +277,21 @@ static ssize_t idle_status_show(struct device *dev,
 
 	return sysfs_emit(buff, "%s\n", gt_idle_state_to_string(state));
 }
-static DEVICE_ATTR_RO(idle_status);
 
+u64 xe_gt_idle_residency_msec(struct xe_gt_idle *gtidle)
+{
+	struct xe_guc_pc *pc = gtidle_to_pc(gtidle);
+	u64 residency;
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&gtidle->lock, flags);
+	residency = get_residency_ms(gtidle, gtidle->idle_residency(pc));
+	raw_spin_unlock_irqrestore(&gtidle->lock, flags);
+
+	return residency;
+}
+
+static DEVICE_ATTR_RO(idle_status);
 static ssize_t idle_residency_ms_show(struct device *dev,
 				      struct device_attribute *attr, char *buff)
 {
@@ -285,10 +300,10 @@ static ssize_t idle_residency_ms_show(struct device *dev,
 	u64 residency;
 
 	xe_pm_runtime_get(pc_to_xe(pc));
-	residency = gtidle->idle_residency(pc);
+	residency = xe_gt_idle_residency_msec(gtidle);
 	xe_pm_runtime_put(pc_to_xe(pc));
 
-	return sysfs_emit(buff, "%llu\n", get_residency_ms(gtidle, residency));
+	return sysfs_emit(buff, "%llu\n", residency);
 }
 static DEVICE_ATTR_RO(idle_residency_ms);
 
@@ -331,6 +346,8 @@ int xe_gt_idle_init(struct xe_gt_idle *gtidle)
 	if (!kobj)
 		return -ENOMEM;
 
+	raw_spin_lock_init(&gtidle->lock);
+
 	if (xe_gt_is_media_type(gt)) {
 		snprintf(gtidle->name, sizeof(gtidle->name), "gt%d-mc", gt->info.id);
 		gtidle->idle_residency = xe_guc_pc_mc6_residency;
diff --git a/drivers/gpu/drm/xe/xe_gt_idle.h b/drivers/gpu/drm/xe/xe_gt_idle.h
index 4455a6501cb0..591a01e181bc 100644
--- a/drivers/gpu/drm/xe/xe_gt_idle.h
+++ b/drivers/gpu/drm/xe/xe_gt_idle.h
@@ -17,5 +17,6 @@ void xe_gt_idle_disable_c6(struct xe_gt *gt);
 void xe_gt_idle_enable_pg(struct xe_gt *gt);
 void xe_gt_idle_disable_pg(struct xe_gt *gt);
 int xe_gt_idle_pg_print(struct xe_gt *gt, struct drm_printer *p);
+u64 xe_gt_idle_residency_msec(struct xe_gt_idle *gtidle);
 
 #endif /* _XE_GT_IDLE_H_ */
diff --git a/drivers/gpu/drm/xe/xe_gt_idle_types.h b/drivers/gpu/drm/xe/xe_gt_idle_types.h
index b8b297a3f884..a3667c567f8a 100644
--- a/drivers/gpu/drm/xe/xe_gt_idle_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_idle_types.h
@@ -6,6 +6,7 @@
 #ifndef _XE_GT_IDLE_SYSFS_TYPES_H_
 #define _XE_GT_IDLE_SYSFS_TYPES_H_
 
+#include <linux/spinlock.h>
 #include <linux/types.h>
 
 struct xe_guc_pc;
@@ -31,6 +32,8 @@ struct xe_gt_idle {
 	u64 cur_residency;
 	/** @prev_residency: previous residency counter */
 	u64 prev_residency;
+	/** @lock: Lock protecting idle residency counters */
+	raw_spinlock_t lock;
 	/** @idle_status: get the current idle state */
 	enum xe_gt_idle_state (*idle_status)(struct xe_guc_pc *pc);
 	/** @idle_residency: get idle residency counter */
-- 
2.51.0


From 3318ef9888d5b4f5c5a9473180fd0b16e9ef266d Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Mon, 6 Jan 2025 15:43:13 -0800
Subject: [PATCH 12/16] drm/xe: Remove unused "mmio_ext" code

The "mmio_ext" and 'REG_EXT" code is currently unused on any existing
platform.  Going forward, this also isn't the design we want to use for
any future platforms/features either, so we should just go ahead and
remove the dead code to avoid confusion.

mmio_ext was originally added in an attempt to hack around the early
(mis)design of the Xe driver, which used xe_gt as the target for all
register MMIO access, even those completely unrelated to the GT subunit
of the hardware.  With the introduction of commit 34953ee349dd ("drm/xe:
Create dedicated xe_mmio structure") and its follow-up patches, that
misdesign has been corrected and access to register MMIO regions
specific to hardware units is now done through xe_mmio structures which
encapsulate an iomap, region size, and some other metadata.

Although all of the registers used by the driver today happen to fall
within one specific PCI BAR region, and thus re-use a single device-wide
iomap, there's no requirement that this stay true for future platforms
or features.  I.e., if a future platform adds a new 'foo' hardware unit
that exists at a different area in the BAR, or even in a completely
different BAR, then that would be handled by doing a separate iomap of
that unit's register region and wrapping it in its own 'struct xe_mmio
foo_regs' structure.  The pointer to the new 'foo_regs' could be placed
within the xe_device, xe_tile, xe_gt, etc., according to where the new
hardware unit falls within the current hardware hierarchy.

This effectively reverts the following commits, although parts of these
commits had already vanished or changed with the earlier xe_mmio
refactor work:

 - commit 399a13323f0d ("drm/xe: add 28-bit address support in struct
   xe_reg")
 - commit fdef72e02e20 ("drm/xe: add a flag to bypass multi-tile config
   from MTCFG reg")
 - commit 866b2b176434 ("drm/xe: add MMIO extension support flags")
 - commit ef29b390c734 ("drm/xe: map MMIO BAR according to the num of
   tiles in device desc")
 - commit a4e2f3a299ea ("drm/xe: refactor xe_mmio_probe_tiles to support
   MMIO extension")

Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Koby Elbaz <kelbaz@habana.ai>
Acked-by: Maciej Patelczyk <maciej.patelczyk@intel.com>
Reviewed-by: Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Stuart Summers <stuart.summers@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20250106234312.2986065-2-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_reg_defs.h | 16 +----------
 drivers/gpu/drm/xe/xe_device_types.h  | 11 --------
 drivers/gpu/drm/xe/xe_mmio.c          | 39 ---------------------------
 drivers/gpu/drm/xe/xe_pci.c           |  3 ---
 drivers/gpu/drm/xe/xe_pci_types.h     |  2 --
 5 files changed, 1 insertion(+), 70 deletions(-)

diff --git a/drivers/gpu/drm/xe/regs/xe_reg_defs.h b/drivers/gpu/drm/xe/regs/xe_reg_defs.h
index 0eedd6c26b1b..89716172fbb8 100644
--- a/drivers/gpu/drm/xe/regs/xe_reg_defs.h
+++ b/drivers/gpu/drm/xe/regs/xe_reg_defs.h
@@ -21,7 +21,7 @@ struct xe_reg {
 	union {
 		struct {
 			/** @addr: address */
-			u32 addr:28;
+			u32 addr:22;
 			/**
 			 * @masked: register is "masked", with upper 16bits used
 			 * to identify the bits that are updated on the lower
@@ -41,10 +41,6 @@ struct xe_reg {
 			 * @vf: register is accessible from the Virtual Function.
 			 */
 			u32 vf:1;
-			/**
-			 * @ext: access MMIO extension space for current register.
-			 */
-			u32 ext:1;
 		};
 		/** @raw: Raw value with both address and options */
 		u32 raw;
@@ -111,16 +107,6 @@ struct xe_reg_mcr {
  */
 #define XE_REG(r_, ...)		((const struct xe_reg)XE_REG_INITIALIZER(r_, ##__VA_ARGS__))
 
-/**
- * XE_REG_EXT - Create a struct xe_reg from extension offset and additional
- * flags
- * @r_: Register extension offset
- * @...: Additional options like access mode. See struct xe_reg for available
- *       options.
- */
-#define XE_REG_EXT(r_, ...)	\
-	((const struct xe_reg)XE_REG_INITIALIZER(r_, ##__VA_ARGS__, .ext = 1))
-
 /**
  * XE_REG_MCR - Create a struct xe_reg_mcr from offset and additional flags
  * @r_: Register offset
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 8a7b15972413..16ebb2859877 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -186,13 +186,6 @@ struct xe_tile {
 	 */
 	struct xe_mmio mmio;
 
-	/**
-	 * @mmio_ext: MMIO-extension info for a tile.
-	 *
-	 * Each tile has its own additional 256MB (28-bit) MMIO-extension space.
-	 */
-	struct xe_mmio mmio_ext;
-
 	/** @mem: memory management info for tile */
 	struct {
 		/**
@@ -263,8 +256,6 @@ struct xe_device {
 		const char *graphics_name;
 		/** @info.media_name: media IP name */
 		const char *media_name;
-		/** @info.tile_mmio_ext_size: size of MMIO extension space, per-tile */
-		u32 tile_mmio_ext_size;
 		/** @info.graphics_verx100: graphics IP version */
 		u32 graphics_verx100;
 		/** @info.media_verx100: media IP version */
@@ -314,8 +305,6 @@ struct xe_device {
 		u8 has_heci_gscfi:1;
 		/** @info.has_llc: Device has a shared CPU+GPU last level cache */
 		u8 has_llc:1;
-		/** @info.has_mmio_ext: Device has extra MMIO address range */
-		u8 has_mmio_ext:1;
 		/** @info.has_range_tlb_invalidation: Has range based TLB invalidations */
 		u8 has_range_tlb_invalidation:1;
 		/** @info.has_sriov: Supports SR-IOV */
diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index a48f239cad1c..d321a21aacf0 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -103,50 +103,11 @@ static void mmio_multi_tile_setup(struct xe_device *xe, size_t tile_mmio_size)
 	}
 }
 
-/*
- * On top of all the multi-tile MMIO space there can be a platform-dependent
- * extension for each tile, resulting in a layout like below:
- *
- * .----------------------. <- ext_base + tile_count * tile_mmio_ext_size
- * |         ....         |
- * |----------------------| <- ext_base + 2 * tile_mmio_ext_size
- * | tile1->mmio_ext.regs |
- * |----------------------| <- ext_base + 1 * tile_mmio_ext_size
- * | tile0->mmio_ext.regs |
- * |======================| <- ext_base = tile_count * tile_mmio_size
- * |                      |
- * |       mmio.regs      |
- * |                      |
- * '----------------------' <- 0MB
- *
- * Set up the tile[]->mmio_ext pointers/sizes.
- */
-static void mmio_extension_setup(struct xe_device *xe, size_t tile_mmio_size,
-				 size_t tile_mmio_ext_size)
-{
-	struct xe_tile *tile;
-	void __iomem *regs;
-	u8 id;
-
-	if (!xe->info.has_mmio_ext)
-		return;
-
-	regs = xe->mmio.regs + tile_mmio_size * xe->info.tile_count;
-	for_each_tile(tile, xe, id) {
-		tile->mmio_ext.regs_size = tile_mmio_ext_size;
-		tile->mmio_ext.regs = regs;
-		tile->mmio_ext.tile = tile;
-		regs += tile_mmio_ext_size;
-	}
-}
-
 int xe_mmio_probe_tiles(struct xe_device *xe)
 {
 	size_t tile_mmio_size = SZ_16M;
-	size_t tile_mmio_ext_size = xe->info.tile_mmio_ext_size;
 
 	mmio_multi_tile_setup(xe, tile_mmio_size);
-	mmio_extension_setup(xe, tile_mmio_size, tile_mmio_ext_size);
 
 	return devm_add_action_or_reset(xe->drm.dev, tiles_fini, xe);
 }
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 39be74848e44..48d1c81d441e 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -61,7 +61,6 @@ struct xe_device_desc {
 	u8 has_heci_gscfi:1;
 	u8 has_heci_cscfi:1;
 	u8 has_llc:1;
-	u8 has_mmio_ext:1;
 	u8 has_sriov:1;
 	u8 skip_guc_pc:1;
 	u8 skip_mtcfg:1;
@@ -617,7 +616,6 @@ static int xe_info_init_early(struct xe_device *xe,
 	xe->info.has_heci_gscfi = desc->has_heci_gscfi;
 	xe->info.has_heci_cscfi = desc->has_heci_cscfi;
 	xe->info.has_llc = desc->has_llc;
-	xe->info.has_mmio_ext = desc->has_mmio_ext;
 	xe->info.has_sriov = desc->has_sriov;
 	xe->info.skip_guc_pc = desc->skip_guc_pc;
 	xe->info.skip_mtcfg = desc->skip_mtcfg;
@@ -677,7 +675,6 @@ static int xe_info_init(struct xe_device *xe,
 
 	xe->info.graphics_name = graphics_desc->name;
 	xe->info.media_name = media_desc ? media_desc->name : "none";
-	xe->info.tile_mmio_ext_size = graphics_desc->tile_mmio_ext_size;
 
 	xe->info.dma_mask_size = graphics_desc->dma_mask_size;
 	xe->info.vram_flags = graphics_desc->vram_flags;
diff --git a/drivers/gpu/drm/xe/xe_pci_types.h b/drivers/gpu/drm/xe/xe_pci_types.h
index 79b0f80376a4..873efec5cdee 100644
--- a/drivers/gpu/drm/xe/xe_pci_types.h
+++ b/drivers/gpu/drm/xe/xe_pci_types.h
@@ -20,8 +20,6 @@ struct xe_graphics_desc {
 
 	u64 hw_engine_mask;	/* hardware engines provided by graphics IP */
 
-	u32 tile_mmio_ext_size; /* size of MMIO extension space, per-tile */
-
 	u8 max_remote_tiles:2;
 
 	u8 has_asid:1;
-- 
2.51.0


From 0af944f0e3082ff517958b1cea76fb9b8cb379dd Mon Sep 17 00:00:00 2001
From: Oak Zeng <oak.zeng@intel.com>
Date: Fri, 10 Jan 2025 16:01:37 -0500
Subject: [PATCH 13/16] drm/xe: Reject BO eviction if BO is bound to current VM
MIME-Version: 1.0
Content-Type: text/plain; charset=utf8
Content-Transfer-Encoding: 8bit

This is a follow up fix for
https://patchwork.freedesktop.org/patch/msgid/20241203021929.1919730-1-oak.zeng@intel.com
The overall goal is to fail vm_bind when there is memory pressure. See more
details in the commit message of above patch. Abbove patch fixes the issue
when user pass in a vm_id parameter during gem_create. If user doesn't pass
in a vm_id during gem_create, above patch doesn't help.

This patch further reject BO eviction (which could be triggered by bo validation)
if BO is bound to the current VM. vm_bind could fail due to the eviction failure.
The BO to VM reverse mapping structure is used to determine whether BO is bound
to VM.

v2:
Move vm_bo definition from function scope to if(evict) clause (Thomas)
Further constraint the condition by adding ctx->resv (Thomas)
Add a short comment describe the change.

Suggested-by: Thomas HellstrÃ¶m <thomas.hellstrom@linux.intel.com>
Signed-off-by: Oak Zeng <oak.zeng@intel.com>
Reviewed-by: Thomas HellstrÃ¶m <thomas.hellstrom@linux.intel.com>
Signed-off-by: Thomas HellstrÃ¶m <thomas.hellstrom@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20250110210137.3181576-1-oak.zeng@intel.com
---
 drivers/gpu/drm/xe/xe_bo.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 3f5391d416d4..4f077c11e8e2 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -713,6 +713,21 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
 		goto out;
 	}
 
+	/* Reject BO eviction if BO is bound to current VM. */
+	if (evict && ctx->resv) {
+		struct drm_gpuvm_bo *vm_bo;
+
+		drm_gem_for_each_gpuvm_bo(vm_bo, &bo->ttm.base) {
+			struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm);
+
+			if (xe_vm_resv(vm) == ctx->resv &&
+			    xe_vm_in_preempt_fence_mode(vm)) {
+				ret = -EBUSY;
+				goto out;
+			}
+		}
+	}
+
 	/*
 	 * Failed multi-hop where the old_mem is still marked as
 	 * TTM_PL_FLAG_TEMPORARY, should just be a dummy move.
-- 
2.51.0


From aaab5404b16f19b06c7d88787d7ba18d91eeb854 Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Wed, 15 Jan 2025 09:50:52 -0500
Subject: [PATCH 14/16] drm/xe: Introduce GuC PC debugfs

Allows the visualization of the current GuC power conservation
status and policies.

v2: Fix DCC msg (Vinay)
v3: Simplify pc_get_state_string (Jonathan)

Reviewed-by: Vinay Belgaumkar <vinay.belgaumkar@intel.com>
Reviewed-by: Jonathan Cavitt <jonathan.cavitt@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20250115145053.1142023-1-rodrigo.vivi@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_guc_debugfs.c | 15 ++++++++
 drivers/gpu/drm/xe/xe_guc_pc.c      | 59 +++++++++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_guc_pc.h      |  2 +
 3 files changed, 76 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_guc_debugfs.c b/drivers/gpu/drm/xe/xe_guc_debugfs.c
index 995b306aced7..0aff1d462bc0 100644
--- a/drivers/gpu/drm/xe/xe_guc_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_guc_debugfs.c
@@ -13,6 +13,7 @@
 #include "xe_guc.h"
 #include "xe_guc_ct.h"
 #include "xe_guc_log.h"
+#include "xe_guc_pc.h"
 #include "xe_macros.h"
 #include "xe_pm.h"
 
@@ -60,10 +61,24 @@ static int guc_ctb(struct seq_file *m, void *data)
 	return 0;
 }
 
+static int guc_pc(struct seq_file *m, void *data)
+{
+	struct xe_guc *guc = node_to_guc(m->private);
+	struct xe_device *xe = guc_to_xe(guc);
+	struct drm_printer p = drm_seq_file_printer(m);
+
+	xe_pm_runtime_get(xe);
+	xe_guc_pc_print(&guc->pc, &p);
+	xe_pm_runtime_put(xe);
+
+	return 0;
+}
+
 static const struct drm_info_list debugfs_list[] = {
 	{"guc_info", guc_info, 0},
 	{"guc_log", guc_log, 0},
 	{"guc_ctb", guc_ctb, 0},
+	{"guc_pc", guc_pc, 0},
 };
 
 void xe_guc_debugfs_register(struct xe_guc *guc, struct dentry *parent)
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
index df7f130fb663..43f9617baba2 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.c
+++ b/drivers/gpu/drm/xe/xe_guc_pc.c
@@ -8,6 +8,7 @@
 #include <linux/delay.h>
 
 #include <drm/drm_managed.h>
+#include <drm/drm_print.h>
 #include <generated/xe_wa_oob.h>
 
 #include "abi/guc_actions_slpc_abi.h"
@@ -1131,3 +1132,61 @@ int xe_guc_pc_init(struct xe_guc_pc *pc)
 
 	return devm_add_action_or_reset(xe->drm.dev, xe_guc_pc_fini_hw, pc);
 }
+
+static const char *pc_get_state_string(struct xe_guc_pc *pc)
+{
+	switch (slpc_shared_data_read(pc, header.global_state)) {
+	case SLPC_GLOBAL_STATE_NOT_RUNNING:
+		return "not running";
+	case SLPC_GLOBAL_STATE_INITIALIZING:
+		return "initializing";
+	case SLPC_GLOBAL_STATE_RESETTING:
+		return "resetting";
+	case SLPC_GLOBAL_STATE_RUNNING:
+		return "running";
+	case SLPC_GLOBAL_STATE_SHUTTING_DOWN:
+		return "shutting down";
+	case SLPC_GLOBAL_STATE_ERROR:
+		return "error";
+	default:
+		return "unknown";
+	}
+}
+
+/**
+ * xe_guc_pc_print - Print GuC's Power Conservation information for debug
+ * @pc: Xe_GuC_PC instance
+ * @p: drm_printer
+ */
+void xe_guc_pc_print(struct xe_guc_pc *pc, struct drm_printer *p)
+{
+	drm_printf(p, "SLPC Shared Data Header:\n");
+	drm_printf(p, "\tSize: %x\n", slpc_shared_data_read(pc, header.size));
+	drm_printf(p, "\tGlobal State: %s\n", pc_get_state_string(pc));
+
+	if (pc_action_query_task_state(pc))
+		return;
+
+	drm_printf(p, "\nSLPC Tasks Status:\n");
+	drm_printf(p, "\tGTPERF enabled: %s\n",
+		   str_yes_no(slpc_shared_data_read(pc, task_state_data.status) &
+			      SLPC_GTPERF_TASK_ENABLED));
+	drm_printf(p, "\tDCC enabled: %s\n",
+		   str_yes_no(slpc_shared_data_read(pc, task_state_data.status) &
+			      SLPC_DCC_TASK_ENABLED));
+	drm_printf(p, "\tDCC in use: %s\n",
+		   str_yes_no(slpc_shared_data_read(pc, task_state_data.status) &
+			      SLPC_IN_DCC));
+	drm_printf(p, "\tBalancer enabled: %s\n",
+		   str_yes_no(slpc_shared_data_read(pc, task_state_data.status) &
+			      SLPC_BALANCER_ENABLED));
+	drm_printf(p, "\tIBC enabled: %s\n",
+		   str_yes_no(slpc_shared_data_read(pc, task_state_data.status) &
+			      SLPC_IBC_TASK_ENABLED));
+	drm_printf(p, "\tBalancer IA LMT enabled: %s\n",
+		   str_yes_no(slpc_shared_data_read(pc, task_state_data.status) &
+			      SLPC_BALANCER_IA_LMT_ENABLED));
+	drm_printf(p, "\tBalancer IA LMT active: %s\n",
+		   str_yes_no(slpc_shared_data_read(pc, task_state_data.status) &
+			      SLPC_BALANCER_IA_LMT_ACTIVE));
+}
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.h b/drivers/gpu/drm/xe/xe_guc_pc.h
index 619f59cd633c..39102b79602f 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.h
+++ b/drivers/gpu/drm/xe/xe_guc_pc.h
@@ -10,6 +10,7 @@
 
 struct xe_guc_pc;
 enum slpc_gucrc_mode;
+struct drm_printer;
 
 int xe_guc_pc_init(struct xe_guc_pc *pc);
 int xe_guc_pc_start(struct xe_guc_pc *pc);
@@ -17,6 +18,7 @@ int xe_guc_pc_stop(struct xe_guc_pc *pc);
 int xe_guc_pc_gucrc_disable(struct xe_guc_pc *pc);
 int xe_guc_pc_override_gucrc_mode(struct xe_guc_pc *pc, enum slpc_gucrc_mode mode);
 int xe_guc_pc_unset_gucrc_mode(struct xe_guc_pc *pc);
+void xe_guc_pc_print(struct xe_guc_pc *pc, struct drm_printer *p);
 
 u32 xe_guc_pc_get_act_freq(struct xe_guc_pc *pc);
 int xe_guc_pc_get_cur_freq(struct xe_guc_pc *pc, u32 *freq);
-- 
2.51.0


From 50554bf3e56dd0c78ef1eedb685d0ab36c9c9987 Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Wed, 15 Jan 2025 09:50:53 -0500
Subject: [PATCH 15/16] drm/xe/lnl: Enable GuC SLPC DCC task

Enable DCC (Duty Cycle Control) in Lunar Lake.

DCC is the SLPC task that tries to keep
the GT from operating inefficiently when thermally constrained.

Although the recommendation is to enable it, LNL GuC is leaving
it disabled by default on LNL.

It would minimize the GT frequency oscillation on throttled
scenarios, which could potentially reduce latencies.

v2: Move set_policies call after wait for running state, so
    we ensure it is not overwritten. (Vinay)
v3: Fix English in the commit message (Jonathan)
v4: Also set disable to 0 so DCC can really get into effect.
v5: Avoid lnl_ prefix (Vinay)
v6: Finish renaming...

Reviewed-by: Vinay Belgaumkar <vinay.belgaumkar@intel.com>
Reviewed-by: Jonathan Cavitt <jonathan.cavitt@intel.com> #v3
Link: https://patchwork.freedesktop.org/patch/msgid/20250115145053.1142023-2-rodrigo.vivi@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_guc_pc.c | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
index 43f9617baba2..44b5211066ef 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.c
+++ b/drivers/gpu/drm/xe/xe_guc_pc.c
@@ -993,6 +993,27 @@ out:
 	return ret;
 }
 
+static int slpc_enable_dcc(struct xe_guc_pc *pc)
+{
+	int ret;
+
+	ret = pc_action_set_param(pc, SLPC_PARAM_TASK_ENABLE_DCC, 1);
+	if (ret)
+		return ret;
+
+	return pc_action_set_param(pc, SLPC_PARAM_TASK_DISABLE_DCC, 0);
+}
+
+static int slpc_set_policies(struct xe_guc_pc *pc)
+{
+	struct xe_device *xe = pc_to_xe(pc);
+
+	if (xe->info.platform == XE_LUNARLAKE)
+		return slpc_enable_dcc(pc);
+
+	return 0;
+}
+
 /**
  * xe_guc_pc_start - Start GuC's Power Conservation component
  * @pc: Xe_GuC_PC instance
@@ -1037,6 +1058,10 @@ int xe_guc_pc_start(struct xe_guc_pc *pc)
 		goto out;
 	}
 
+	ret = slpc_set_policies(pc);
+	if (ret)
+		goto out;
+
 	ret = pc_init_freqs(pc);
 	if (ret)
 		goto out;
-- 
2.51.0


From 11a64adcdbcc3028b96e440bc33fa76e2e825c10 Mon Sep 17 00:00:00 2001
From: Francois Dugast <francois.dugast@intel.com>
Date: Tue, 14 Jan 2025 12:38:53 -0800
Subject: [PATCH 16/16] drm/xe/xe3: Generate and store the L3 bank mask

On Xe3, the register used to indicate which L3 banks are enabled on
the system is a new one called MIRROR_L3BANK_ENABLE. Each bit
represents one bank enabled in each node.
Extend the existing topology code for Xe3 to read this register and
generate the correct L3 bank mask, which can be read by user space
throug the topology query.

Bspec: 72573, 73439
Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Signed-off-by: Matt Atwood <matthew.s.atwood@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20250114203853.35055-1-matthew.s.atwood@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_gt_regs.h |  3 +++
 drivers/gpu/drm/xe/xe_gt_topology.c  | 16 +++++++++++++---
 2 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index b4283ac030f4..096859072396 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -221,6 +221,9 @@
 
 #define MIRROR_FUSE1				XE_REG(0x911c)
 
+#define MIRROR_L3BANK_ENABLE			XE_REG(0x9130)
+#define   XE3_L3BANK_ENABLE			REG_GENMASK(31, 0)
+
 #define XELP_EU_ENABLE				XE_REG(0x9134)	/* "_DISABLE" on Xe_LP */
 #define   XELP_EU_MASK				REG_GENMASK(7, 0)
 #define XELP_GT_SLICE_ENABLE			XE_REG(0x9138)
diff --git a/drivers/gpu/drm/xe/xe_gt_topology.c b/drivers/gpu/drm/xe/xe_gt_topology.c
index df2042db7ee6..516c81e3b8dd 100644
--- a/drivers/gpu/drm/xe/xe_gt_topology.c
+++ b/drivers/gpu/drm/xe/xe_gt_topology.c
@@ -129,7 +129,8 @@ static void
 load_l3_bank_mask(struct xe_gt *gt, xe_l3_bank_mask_t l3_bank_mask)
 {
 	struct xe_device *xe = gt_to_xe(gt);
-	u32 fuse3 = xe_mmio_read32(&gt->mmio, MIRROR_FUSE3);
+	struct xe_mmio *mmio = &gt->mmio;
+	u32 fuse3 = xe_mmio_read32(mmio, MIRROR_FUSE3);
 
 	/*
 	 * PTL platforms with media version 30.00 do not provide proper values
@@ -143,7 +144,16 @@ load_l3_bank_mask(struct xe_gt *gt, xe_l3_bank_mask_t l3_bank_mask)
 	if (XE_WA(gt, no_media_l3))
 		return;
 
-	if (GRAPHICS_VER(xe) >= 20) {
+	if (GRAPHICS_VER(xe) >= 30) {
+		xe_l3_bank_mask_t per_node = {};
+		u32 meml3_en = REG_FIELD_GET(XE2_NODE_ENABLE_MASK, fuse3);
+		u32 mirror_l3bank_enable = xe_mmio_read32(mmio, MIRROR_L3BANK_ENABLE);
+		u32 bank_val = REG_FIELD_GET(XE3_L3BANK_ENABLE, mirror_l3bank_enable);
+
+		bitmap_from_arr32(per_node, &bank_val, 32);
+		gen_l3_mask_from_pattern(xe, l3_bank_mask, per_node, 32,
+					 meml3_en);
+	} else if (GRAPHICS_VER(xe) >= 20) {
 		xe_l3_bank_mask_t per_node = {};
 		u32 meml3_en = REG_FIELD_GET(XE2_NODE_ENABLE_MASK, fuse3);
 		u32 bank_val = REG_FIELD_GET(XE2_GT_L3_MODE_MASK, fuse3);
@@ -155,7 +165,7 @@ load_l3_bank_mask(struct xe_gt *gt, xe_l3_bank_mask_t l3_bank_mask)
 		xe_l3_bank_mask_t per_node = {};
 		xe_l3_bank_mask_t per_mask_bit = {};
 		u32 meml3_en = REG_FIELD_GET(MEML3_EN_MASK, fuse3);
-		u32 fuse4 = xe_mmio_read32(&gt->mmio, XEHP_FUSE4);
+		u32 fuse4 = xe_mmio_read32(mmio, XEHP_FUSE4);
 		u32 bank_val = REG_FIELD_GET(GT_L3_EXC_MASK, fuse4);
 
 		bitmap_set_value8(per_mask_bit, 0x3, 0);
-- 
2.51.0