From 3034cc8107b8d0c7d1b56584394e215dab57f8a3 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 29 Aug 2024 15:06:21 -0700 Subject: [PATCH 01/16] drm/xe/pcode: Treat pcode as per-tile rather than per-GT There's only one instance of the pcode per tile, and for GT-related accesses both the primary and media GT share the same register interface. Since Xe was using per-GT locking, the pcode mutex wasn't actually protecting everything that it should since concurrent accesses related to a tile's primary GT and media GT were possible. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Signed-off-by: Matt Roper Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240829220619.789159-5-matthew.d.roper@intel.com --- .../drm/xe/compat-i915-headers/intel_pcode.h | 8 +- .../drm/xe/compat-i915-headers/intel_uncore.h | 7 ++ drivers/gpu/drm/xe/xe_device_types.h | 6 + drivers/gpu/drm/xe/xe_gt.c | 2 - drivers/gpu/drm/xe/xe_gt_types.h | 6 - drivers/gpu/drm/xe/xe_guc_pc.c | 2 +- drivers/gpu/drm/xe/xe_hwmon.c | 4 +- drivers/gpu/drm/xe/xe_pcode.c | 104 +++++++++--------- drivers/gpu/drm/xe/xe_pcode.h | 16 +-- drivers/gpu/drm/xe/xe_tile.c | 3 + drivers/gpu/drm/xe/xe_vram_freq.c | 6 +- 11 files changed, 85 insertions(+), 79 deletions(-) diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_pcode.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_pcode.h index 0c47661bdc6a..a473aa6697d0 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/intel_pcode.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_pcode.h @@ -13,7 +13,7 @@ static inline int snb_pcode_write_timeout(struct intel_uncore *uncore, u32 mbox, u32 val, int fast_timeout_us, int slow_timeout_ms) { - return xe_pcode_write_timeout(__compat_uncore_to_gt(uncore), mbox, val, + return xe_pcode_write_timeout(__compat_uncore_to_tile(uncore), mbox, val, slow_timeout_ms ?: 1); } @@ -21,13 +21,13 @@ static inline int snb_pcode_write(struct intel_uncore *uncore, u32 mbox, u32 val) { - return xe_pcode_write(__compat_uncore_to_gt(uncore), mbox, val); + return xe_pcode_write(__compat_uncore_to_tile(uncore), mbox, val); } static inline int snb_pcode_read(struct intel_uncore *uncore, u32 mbox, u32 *val, u32 *val1) { - return xe_pcode_read(__compat_uncore_to_gt(uncore), mbox, val, val1); + return xe_pcode_read(__compat_uncore_to_tile(uncore), mbox, val, val1); } static inline int @@ -35,7 +35,7 @@ skl_pcode_request(struct intel_uncore *uncore, u32 mbox, u32 request, u32 reply_mask, u32 reply, int timeout_base_ms) { - return xe_pcode_request(__compat_uncore_to_gt(uncore), mbox, request, reply_mask, reply, + return xe_pcode_request(__compat_uncore_to_tile(uncore), mbox, request, reply_mask, reply, timeout_base_ms); } diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h index 083c4da2ea41..eb5b5f0e4bd9 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h @@ -17,6 +17,13 @@ static inline struct xe_gt *__compat_uncore_to_gt(struct intel_uncore *uncore) return xe_root_mmio_gt(xe); } +static inline struct xe_tile *__compat_uncore_to_tile(struct intel_uncore *uncore) +{ + struct xe_device *xe = container_of(uncore, struct xe_device, uncore); + + return xe_device_get_root_tile(xe); +} + static inline u32 intel_uncore_read(struct intel_uncore *uncore, i915_reg_t i915_reg) { diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 856db4020048..9c2ae57ecf99 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -208,6 +208,12 @@ struct xe_tile { } vf; } sriov; + /** @pcode: tile's PCODE */ + struct { + /** @pcode.lock: protecting tile's PCODE mailbox data */ + struct mutex lock; + } pcode; + /** @migrate: Migration helper for vram blits and clearing */ struct xe_migrate *migrate; diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index bb612f089e49..ffacddb587b2 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -46,7 +46,6 @@ #include "xe_migrate.h" #include "xe_mmio.h" #include "xe_pat.h" -#include "xe_pcode.h" #include "xe_pm.h" #include "xe_mocs.h" #include "xe_reg_sr.h" @@ -386,7 +385,6 @@ int xe_gt_init_early(struct xe_gt *gt) xe_tuning_process_gt(gt); xe_force_wake_init_gt(gt, gt_to_fw(gt)); - xe_pcode_init(gt); spin_lock_init(>->global_invl_lock); return 0; diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index 31946d7fe701..3d1c51de0268 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -329,12 +329,6 @@ struct xe_gt { /** @eclass: per hardware engine class interface on the GT */ struct xe_hw_engine_class_intf eclass[XE_ENGINE_CLASS_MAX]; - /** @pcode: GT's PCODE */ - struct { - /** @pcode.lock: protecting GT's PCODE mailbox data */ - struct mutex lock; - } pcode; - /** @sysfs: sysfs' kobj used by xe_gt_sysfs */ struct kobject *sysfs; diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index def503abeed5..034b29984d5e 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -915,7 +915,7 @@ static void pc_init_pcode_freq(struct xe_guc_pc *pc) u32 min = DIV_ROUND_CLOSEST(pc->rpn_freq, GT_FREQUENCY_MULTIPLIER); u32 max = DIV_ROUND_CLOSEST(pc->rp0_freq, GT_FREQUENCY_MULTIPLIER); - XE_WARN_ON(xe_pcode_init_min_freq_table(pc_to_gt(pc), min, max)); + XE_WARN_ON(xe_pcode_init_min_freq_table(gt_to_tile(pc_to_gt(pc)), min, max)); } static int pc_init_freqs(struct xe_guc_pc *pc) diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c index 1faeca70900e..98e3ec08279e 100644 --- a/drivers/gpu/drm/xe/xe_hwmon.c +++ b/drivers/gpu/drm/xe/xe_hwmon.c @@ -441,14 +441,14 @@ static int xe_hwmon_pcode_read_i1(struct xe_gt *gt, u32 *uval) if (gt_to_xe(gt)->info.platform == XE_DG2) return -ENXIO; - return xe_pcode_read(gt, PCODE_MBOX(PCODE_POWER_SETUP, + return xe_pcode_read(gt_to_tile(gt), PCODE_MBOX(PCODE_POWER_SETUP, POWER_SETUP_SUBCOMMAND_READ_I1, 0), uval, NULL); } static int xe_hwmon_pcode_write_i1(struct xe_gt *gt, u32 uval) { - return xe_pcode_write(gt, PCODE_MBOX(PCODE_POWER_SETUP, + return xe_pcode_write(gt_to_tile(gt), PCODE_MBOX(PCODE_POWER_SETUP, POWER_SETUP_SUBCOMMAND_WRITE_I1, 0), (uval & POWER_SETUP_I1_DATA_MASK)); } diff --git a/drivers/gpu/drm/xe/xe_pcode.c b/drivers/gpu/drm/xe/xe_pcode.c index 9c4eefdf6642..7397d556996a 100644 --- a/drivers/gpu/drm/xe/xe_pcode.c +++ b/drivers/gpu/drm/xe/xe_pcode.c @@ -12,7 +12,6 @@ #include "xe_assert.h" #include "xe_device.h" -#include "xe_gt.h" #include "xe_mmio.h" #include "xe_pcode_api.h" @@ -30,7 +29,7 @@ * - PCODE for display operations */ -static int pcode_mailbox_status(struct xe_gt *gt) +static int pcode_mailbox_status(struct xe_tile *tile) { u32 err; static const struct pcode_err_decode err_decode[] = { @@ -45,9 +44,9 @@ static int pcode_mailbox_status(struct xe_gt *gt) [PCODE_ERROR_MASK] = {-EPROTO, "Unknown"}, }; - err = xe_mmio_read32(gt, PCODE_MAILBOX) & PCODE_ERROR_MASK; + err = xe_mmio_read32(tile->primary_gt, PCODE_MAILBOX) & PCODE_ERROR_MASK; if (err) { - drm_err(>_to_xe(gt)->drm, "PCODE Mailbox failed: %d %s", err, + drm_err(&tile_to_xe(tile)->drm, "PCODE Mailbox failed: %d %s", err, err_decode[err].str ?: "Unknown"); return err_decode[err].errno ?: -EPROTO; } @@ -55,84 +54,85 @@ static int pcode_mailbox_status(struct xe_gt *gt) return 0; } -static int __pcode_mailbox_rw(struct xe_gt *gt, u32 mbox, u32 *data0, u32 *data1, +static int __pcode_mailbox_rw(struct xe_tile *tile, u32 mbox, u32 *data0, u32 *data1, unsigned int timeout_ms, bool return_data, bool atomic) { + struct xe_gt *mmio = tile->primary_gt; int err; - if (gt_to_xe(gt)->info.skip_pcode) + if (tile_to_xe(tile)->info.skip_pcode) return 0; - if ((xe_mmio_read32(gt, PCODE_MAILBOX) & PCODE_READY) != 0) + if ((xe_mmio_read32(mmio, PCODE_MAILBOX) & PCODE_READY) != 0) return -EAGAIN; - xe_mmio_write32(gt, PCODE_DATA0, *data0); - xe_mmio_write32(gt, PCODE_DATA1, data1 ? *data1 : 0); - xe_mmio_write32(gt, PCODE_MAILBOX, PCODE_READY | mbox); + xe_mmio_write32(mmio, PCODE_DATA0, *data0); + xe_mmio_write32(mmio, PCODE_DATA1, data1 ? *data1 : 0); + xe_mmio_write32(mmio, PCODE_MAILBOX, PCODE_READY | mbox); - err = xe_mmio_wait32(gt, PCODE_MAILBOX, PCODE_READY, 0, + err = xe_mmio_wait32(mmio, PCODE_MAILBOX, PCODE_READY, 0, timeout_ms * USEC_PER_MSEC, NULL, atomic); if (err) return err; if (return_data) { - *data0 = xe_mmio_read32(gt, PCODE_DATA0); + *data0 = xe_mmio_read32(mmio, PCODE_DATA0); if (data1) - *data1 = xe_mmio_read32(gt, PCODE_DATA1); + *data1 = xe_mmio_read32(mmio, PCODE_DATA1); } - return pcode_mailbox_status(gt); + return pcode_mailbox_status(tile); } -static int pcode_mailbox_rw(struct xe_gt *gt, u32 mbox, u32 *data0, u32 *data1, +static int pcode_mailbox_rw(struct xe_tile *tile, u32 mbox, u32 *data0, u32 *data1, unsigned int timeout_ms, bool return_data, bool atomic) { - if (gt_to_xe(gt)->info.skip_pcode) + if (tile_to_xe(tile)->info.skip_pcode) return 0; - lockdep_assert_held(>->pcode.lock); + lockdep_assert_held(&tile->pcode.lock); - return __pcode_mailbox_rw(gt, mbox, data0, data1, timeout_ms, return_data, atomic); + return __pcode_mailbox_rw(tile, mbox, data0, data1, timeout_ms, return_data, atomic); } -int xe_pcode_write_timeout(struct xe_gt *gt, u32 mbox, u32 data, int timeout) +int xe_pcode_write_timeout(struct xe_tile *tile, u32 mbox, u32 data, int timeout) { int err; - mutex_lock(>->pcode.lock); - err = pcode_mailbox_rw(gt, mbox, &data, NULL, timeout, false, false); - mutex_unlock(>->pcode.lock); + mutex_lock(&tile->pcode.lock); + err = pcode_mailbox_rw(tile, mbox, &data, NULL, timeout, false, false); + mutex_unlock(&tile->pcode.lock); return err; } -int xe_pcode_read(struct xe_gt *gt, u32 mbox, u32 *val, u32 *val1) +int xe_pcode_read(struct xe_tile *tile, u32 mbox, u32 *val, u32 *val1) { int err; - mutex_lock(>->pcode.lock); - err = pcode_mailbox_rw(gt, mbox, val, val1, 1, true, false); - mutex_unlock(>->pcode.lock); + mutex_lock(&tile->pcode.lock); + err = pcode_mailbox_rw(tile, mbox, val, val1, 1, true, false); + mutex_unlock(&tile->pcode.lock); return err; } -static int pcode_try_request(struct xe_gt *gt, u32 mbox, +static int pcode_try_request(struct xe_tile *tile, u32 mbox, u32 request, u32 reply_mask, u32 reply, u32 *status, bool atomic, int timeout_us, bool locked) { int slept, wait = 10; - xe_gt_assert(gt, timeout_us > 0); + xe_tile_assert(tile, timeout_us > 0); for (slept = 0; slept < timeout_us; slept += wait) { if (locked) - *status = pcode_mailbox_rw(gt, mbox, &request, NULL, 1, true, + *status = pcode_mailbox_rw(tile, mbox, &request, NULL, 1, true, atomic); else - *status = __pcode_mailbox_rw(gt, mbox, &request, NULL, 1, true, + *status = __pcode_mailbox_rw(tile, mbox, &request, NULL, 1, true, atomic); if ((*status == 0) && ((request & reply_mask) == reply)) return 0; @@ -149,7 +149,7 @@ static int pcode_try_request(struct xe_gt *gt, u32 mbox, /** * xe_pcode_request - send PCODE request until acknowledgment - * @gt: gt + * @tile: tile * @mbox: PCODE mailbox ID the request is targeted for * @request: request ID * @reply_mask: mask used to check for request acknowledgment @@ -166,17 +166,17 @@ static int pcode_try_request(struct xe_gt *gt, u32 mbox, * Returns 0 on success, %-ETIMEDOUT in case of a timeout, <0 in case of some * other error as reported by PCODE. */ -int xe_pcode_request(struct xe_gt *gt, u32 mbox, u32 request, - u32 reply_mask, u32 reply, int timeout_base_ms) +int xe_pcode_request(struct xe_tile *tile, u32 mbox, u32 request, + u32 reply_mask, u32 reply, int timeout_base_ms) { u32 status; int ret; - xe_gt_assert(gt, timeout_base_ms <= 3); + xe_tile_assert(tile, timeout_base_ms <= 3); - mutex_lock(>->pcode.lock); + mutex_lock(&tile->pcode.lock); - ret = pcode_try_request(gt, mbox, request, reply_mask, reply, &status, + ret = pcode_try_request(tile, mbox, request, reply_mask, reply, &status, false, timeout_base_ms * 1000, true); if (!ret) goto out; @@ -191,20 +191,20 @@ int xe_pcode_request(struct xe_gt *gt, u32 mbox, u32 request, * requests, and for any quirks of the PCODE firmware that delays * the request completion. */ - drm_err(>_to_xe(gt)->drm, + drm_err(&tile_to_xe(tile)->drm, "PCODE timeout, retrying with preemption disabled\n"); preempt_disable(); - ret = pcode_try_request(gt, mbox, request, reply_mask, reply, &status, + ret = pcode_try_request(tile, mbox, request, reply_mask, reply, &status, true, 50 * 1000, true); preempt_enable(); out: - mutex_unlock(>->pcode.lock); + mutex_unlock(&tile->pcode.lock); return status ? status : ret; } /** * xe_pcode_init_min_freq_table - Initialize PCODE's QOS frequency table - * @gt: gt instance + * @tile: tile instance * @min_gt_freq: Minimal (RPn) GT frequency in units of 50MHz. * @max_gt_freq: Maximal (RP0) GT frequency in units of 50MHz. * @@ -227,30 +227,30 @@ out: * - -EACCES, "PCODE Rejected" * - -EPROTO, "Unknown" */ -int xe_pcode_init_min_freq_table(struct xe_gt *gt, u32 min_gt_freq, +int xe_pcode_init_min_freq_table(struct xe_tile *tile, u32 min_gt_freq, u32 max_gt_freq) { int ret; u32 freq; - if (!gt_to_xe(gt)->info.has_llc) + if (!tile_to_xe(tile)->info.has_llc) return 0; if (max_gt_freq <= min_gt_freq) return -EINVAL; - mutex_lock(>->pcode.lock); + mutex_lock(&tile->pcode.lock); for (freq = min_gt_freq; freq <= max_gt_freq; freq++) { u32 data = freq << PCODE_FREQ_RING_RATIO_SHIFT | freq; - ret = pcode_mailbox_rw(gt, PCODE_WRITE_MIN_FREQ_TABLE, + ret = pcode_mailbox_rw(tile, PCODE_WRITE_MIN_FREQ_TABLE, &data, NULL, 1, false, false); if (ret) goto unlock; } unlock: - mutex_unlock(>->pcode.lock); + mutex_unlock(&tile->pcode.lock); return ret; } @@ -270,7 +270,7 @@ unlock: int xe_pcode_ready(struct xe_device *xe, bool locked) { u32 status, request = DGFX_GET_INIT_STATUS; - struct xe_gt *gt = xe_root_mmio_gt(xe); + struct xe_tile *tile = xe_device_get_root_tile(xe); int timeout_us = 180000000; /* 3 min */ int ret; @@ -281,15 +281,15 @@ int xe_pcode_ready(struct xe_device *xe, bool locked) return 0; if (locked) - mutex_lock(>->pcode.lock); + mutex_lock(&tile->pcode.lock); - ret = pcode_try_request(gt, DGFX_PCODE_STATUS, request, + ret = pcode_try_request(tile, DGFX_PCODE_STATUS, request, DGFX_INIT_STATUS_COMPLETE, DGFX_INIT_STATUS_COMPLETE, &status, false, timeout_us, locked); if (locked) - mutex_unlock(>->pcode.lock); + mutex_unlock(&tile->pcode.lock); if (ret) drm_err(&xe->drm, @@ -300,14 +300,14 @@ int xe_pcode_ready(struct xe_device *xe, bool locked) /** * xe_pcode_init: initialize components of PCODE - * @gt: gt instance + * @tile: tile instance * * This function initializes the xe_pcode component. * To be called once only during probe. */ -void xe_pcode_init(struct xe_gt *gt) +void xe_pcode_init(struct xe_tile *tile) { - drmm_mutex_init(>_to_xe(gt)->drm, >->pcode.lock); + drmm_mutex_init(&tile_to_xe(tile)->drm, &tile->pcode.lock); } /** diff --git a/drivers/gpu/drm/xe/xe_pcode.h b/drivers/gpu/drm/xe/xe_pcode.h index 3f54c6d2a57d..ba33991d72a7 100644 --- a/drivers/gpu/drm/xe/xe_pcode.h +++ b/drivers/gpu/drm/xe/xe_pcode.h @@ -7,21 +7,21 @@ #define _XE_PCODE_H_ #include -struct xe_gt; +struct xe_tile; struct xe_device; -void xe_pcode_init(struct xe_gt *gt); +void xe_pcode_init(struct xe_tile *tile); int xe_pcode_probe_early(struct xe_device *xe); int xe_pcode_ready(struct xe_device *xe, bool locked); -int xe_pcode_init_min_freq_table(struct xe_gt *gt, u32 min_gt_freq, +int xe_pcode_init_min_freq_table(struct xe_tile *tile, u32 min_gt_freq, u32 max_gt_freq); -int xe_pcode_read(struct xe_gt *gt, u32 mbox, u32 *val, u32 *val1); -int xe_pcode_write_timeout(struct xe_gt *gt, u32 mbox, u32 val, +int xe_pcode_read(struct xe_tile *tile, u32 mbox, u32 *val, u32 *val1); +int xe_pcode_write_timeout(struct xe_tile *tile, u32 mbox, u32 val, int timeout_ms); -#define xe_pcode_write(gt, mbox, val) \ - xe_pcode_write_timeout(gt, mbox, val, 1) +#define xe_pcode_write(tile, mbox, val) \ + xe_pcode_write_timeout(tile, mbox, val, 1) -int xe_pcode_request(struct xe_gt *gt, u32 mbox, u32 request, +int xe_pcode_request(struct xe_tile *tile, u32 mbox, u32 request, u32 reply_mask, u32 reply, int timeout_ms); #define PCODE_MBOX(mbcmd, param1, param2)\ diff --git a/drivers/gpu/drm/xe/xe_tile.c b/drivers/gpu/drm/xe/xe_tile.c index 15ea0a942f67..dda5268507d8 100644 --- a/drivers/gpu/drm/xe/xe_tile.c +++ b/drivers/gpu/drm/xe/xe_tile.c @@ -9,6 +9,7 @@ #include "xe_ggtt.h" #include "xe_gt.h" #include "xe_migrate.h" +#include "xe_pcode.h" #include "xe_sa.h" #include "xe_tile.h" #include "xe_tile_sysfs.h" @@ -124,6 +125,8 @@ int xe_tile_init_early(struct xe_tile *tile, struct xe_device *xe, u8 id) if (IS_ERR(tile->primary_gt)) return PTR_ERR(tile->primary_gt); + xe_pcode_init(tile); + return 0; } diff --git a/drivers/gpu/drm/xe/xe_vram_freq.c b/drivers/gpu/drm/xe/xe_vram_freq.c index 99ff95e408e0..b26e26d73dae 100644 --- a/drivers/gpu/drm/xe/xe_vram_freq.c +++ b/drivers/gpu/drm/xe/xe_vram_freq.c @@ -34,7 +34,6 @@ static ssize_t max_freq_show(struct device *dev, struct device_attribute *attr, char *buf) { struct xe_tile *tile = dev_to_tile(dev); - struct xe_gt *gt = tile->primary_gt; u32 val, mbox; int err; @@ -42,7 +41,7 @@ static ssize_t max_freq_show(struct device *dev, struct device_attribute *attr, | REG_FIELD_PREP(PCODE_MB_PARAM1, PCODE_MBOX_FC_SC_READ_FUSED_P0) | REG_FIELD_PREP(PCODE_MB_PARAM2, PCODE_MBOX_DOMAIN_HBM); - err = xe_pcode_read(gt, mbox, &val, NULL); + err = xe_pcode_read(tile, mbox, &val, NULL); if (err) return err; @@ -57,7 +56,6 @@ static ssize_t min_freq_show(struct device *dev, struct device_attribute *attr, char *buf) { struct xe_tile *tile = dev_to_tile(dev); - struct xe_gt *gt = tile->primary_gt; u32 val, mbox; int err; @@ -65,7 +63,7 @@ static ssize_t min_freq_show(struct device *dev, struct device_attribute *attr, | REG_FIELD_PREP(PCODE_MB_PARAM1, PCODE_MBOX_FC_SC_READ_FUSED_PN) | REG_FIELD_PREP(PCODE_MB_PARAM2, PCODE_MBOX_DOMAIN_HBM); - err = xe_pcode_read(gt, mbox, &val, NULL); + err = xe_pcode_read(tile, mbox, &val, NULL); if (err) return err; -- 2.51.0 From 20f61c1ead989e5d16f35bd0a546c6233b703b69 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 29 Aug 2024 15:06:22 -0700 Subject: [PATCH 02/16] drm/xe/hwmon: Treat hwmon as a per-device concept There's only one instance of hwmon per device, and MMIO access to it is always done through the root tile. The code has been passing around a pointer to the root tile's primary GT, which is confusing since this isn't really a GT-level concept. Replace that pointer with an xe_device pointer and use xe_root_mmio_gt(xe) to get a pointer when we need to do register MMIO. This makes things easier to follow, and also cleans up the code in preparation for a much larger MMIO register access overhaul that's coming soon. Signed-off-by: Matt Roper Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240829220619.789159-6-matthew.d.roper@intel.com --- drivers/gpu/drm/xe/xe_hwmon.c | 95 +++++++++++++++++++---------------- 1 file changed, 53 insertions(+), 42 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c index 98e3ec08279e..aa11728e7e79 100644 --- a/drivers/gpu/drm/xe/xe_hwmon.c +++ b/drivers/gpu/drm/xe/xe_hwmon.c @@ -12,7 +12,6 @@ #include "regs/xe_mchbar_regs.h" #include "regs/xe_pcode_regs.h" #include "xe_device.h" -#include "xe_gt.h" #include "xe_hwmon.h" #include "xe_mmio.h" #include "xe_pcode.h" @@ -65,8 +64,8 @@ struct xe_hwmon_energy_info { struct xe_hwmon { /** @hwmon_dev: hwmon device for xe */ struct device *hwmon_dev; - /** @gt: primary gt */ - struct xe_gt *gt; + /** @xe: Xe device */ + struct xe_device *xe; /** @hwmon_lock: lock for rw attributes*/ struct mutex hwmon_lock; /** @scl_shift_power: pkg power unit */ @@ -82,7 +81,7 @@ struct xe_hwmon { static struct xe_reg xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg hwmon_reg, int channel) { - struct xe_device *xe = gt_to_xe(hwmon->gt); + struct xe_device *xe = hwmon->xe; switch (hwmon_reg) { case REG_PKG_RAPL_LIMIT: @@ -148,8 +147,9 @@ static struct xe_reg xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg static void xe_hwmon_power_max_read(struct xe_hwmon *hwmon, int channel, long *value) { u64 reg_val, min, max; - struct xe_device *xe = gt_to_xe(hwmon->gt); + struct xe_device *xe = hwmon->xe; struct xe_reg rapl_limit, pkg_power_sku; + struct xe_gt *mmio = xe_root_mmio_gt(xe); rapl_limit = xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, channel); pkg_power_sku = xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU, channel); @@ -166,7 +166,7 @@ static void xe_hwmon_power_max_read(struct xe_hwmon *hwmon, int channel, long *v mutex_lock(&hwmon->hwmon_lock); - reg_val = xe_mmio_read32(hwmon->gt, rapl_limit); + reg_val = xe_mmio_read32(mmio, rapl_limit); /* Check if PL1 limit is disabled */ if (!(reg_val & PKG_PWR_LIM_1_EN)) { *value = PL1_DISABLE; @@ -176,7 +176,7 @@ static void xe_hwmon_power_max_read(struct xe_hwmon *hwmon, int channel, long *v reg_val = REG_FIELD_GET(PKG_PWR_LIM_1, reg_val); *value = mul_u64_u32_shr(reg_val, SF_POWER, hwmon->scl_shift_power); - reg_val = xe_mmio_read64_2x32(hwmon->gt, pkg_power_sku); + reg_val = xe_mmio_read64_2x32(mmio, pkg_power_sku); min = REG_FIELD_GET(PKG_MIN_PWR, reg_val); min = mul_u64_u32_shr(min, SF_POWER, hwmon->scl_shift_power); max = REG_FIELD_GET(PKG_MAX_PWR, reg_val); @@ -190,6 +190,7 @@ unlock: static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, int channel, long value) { + struct xe_gt *mmio = xe_root_mmio_gt(hwmon->xe); int ret = 0; u64 reg_val; struct xe_reg rapl_limit; @@ -200,10 +201,10 @@ static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, int channel, long va /* Disable PL1 limit and verify, as limit cannot be disabled on all platforms */ if (value == PL1_DISABLE) { - reg_val = xe_mmio_rmw32(hwmon->gt, rapl_limit, PKG_PWR_LIM_1_EN, 0); - reg_val = xe_mmio_read32(hwmon->gt, rapl_limit); + reg_val = xe_mmio_rmw32(mmio, rapl_limit, PKG_PWR_LIM_1_EN, 0); + reg_val = xe_mmio_read32(mmio, rapl_limit); if (reg_val & PKG_PWR_LIM_1_EN) { - drm_warn(>_to_xe(hwmon->gt)->drm, "PL1 disable is not supported!\n"); + drm_warn(&hwmon->xe->drm, "PL1 disable is not supported!\n"); ret = -EOPNOTSUPP; } goto unlock; @@ -212,7 +213,7 @@ static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, int channel, long va /* Computation in 64-bits to avoid overflow. Round to nearest. */ reg_val = DIV_ROUND_CLOSEST_ULL((u64)value << hwmon->scl_shift_power, SF_POWER); reg_val = PKG_PWR_LIM_1_EN | REG_FIELD_PREP(PKG_PWR_LIM_1, reg_val); - reg_val = xe_mmio_rmw32(hwmon->gt, rapl_limit, PKG_PWR_LIM_1_EN | PKG_PWR_LIM_1, reg_val); + reg_val = xe_mmio_rmw32(mmio, rapl_limit, PKG_PWR_LIM_1_EN | PKG_PWR_LIM_1, reg_val); unlock: mutex_unlock(&hwmon->hwmon_lock); @@ -221,6 +222,7 @@ unlock: static void xe_hwmon_power_rated_max_read(struct xe_hwmon *hwmon, int channel, long *value) { + struct xe_gt *mmio = xe_root_mmio_gt(hwmon->xe); struct xe_reg reg = xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU, channel); u64 reg_val; @@ -229,7 +231,7 @@ static void xe_hwmon_power_rated_max_read(struct xe_hwmon *hwmon, int channel, l * for this register can be skipped. * See xe_hwmon_power_is_visible. */ - reg_val = xe_mmio_read32(hwmon->gt, reg); + reg_val = xe_mmio_read32(mmio, reg); reg_val = REG_FIELD_GET(PKG_TDP, reg_val); *value = mul_u64_u32_shr(reg_val, SF_POWER, hwmon->scl_shift_power); } @@ -257,11 +259,12 @@ static void xe_hwmon_power_rated_max_read(struct xe_hwmon *hwmon, int channel, l static void xe_hwmon_energy_get(struct xe_hwmon *hwmon, int channel, long *energy) { + struct xe_gt *mmio = xe_root_mmio_gt(hwmon->xe); struct xe_hwmon_energy_info *ei = &hwmon->ei[channel]; u64 reg_val; - reg_val = xe_mmio_read32(hwmon->gt, xe_hwmon_get_reg(hwmon, REG_PKG_ENERGY_STATUS, - channel)); + reg_val = xe_mmio_read32(mmio, xe_hwmon_get_reg(hwmon, REG_PKG_ENERGY_STATUS, + channel)); if (reg_val >= ei->reg_val_prev) ei->accum_energy += reg_val - ei->reg_val_prev; @@ -279,19 +282,20 @@ xe_hwmon_power_max_interval_show(struct device *dev, struct device_attribute *at char *buf) { struct xe_hwmon *hwmon = dev_get_drvdata(dev); + struct xe_gt *mmio = xe_root_mmio_gt(hwmon->xe); u32 x, y, x_w = 2; /* 2 bits */ u64 r, tau4, out; int sensor_index = to_sensor_dev_attr(attr)->index; - xe_pm_runtime_get(gt_to_xe(hwmon->gt)); + xe_pm_runtime_get(hwmon->xe); mutex_lock(&hwmon->hwmon_lock); - r = xe_mmio_read32(hwmon->gt, xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, sensor_index)); + r = xe_mmio_read32(mmio, xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, sensor_index)); mutex_unlock(&hwmon->hwmon_lock); - xe_pm_runtime_put(gt_to_xe(hwmon->gt)); + xe_pm_runtime_put(hwmon->xe); x = REG_FIELD_GET(PKG_PWR_LIM_1_TIME_X, r); y = REG_FIELD_GET(PKG_PWR_LIM_1_TIME_Y, r); @@ -319,6 +323,7 @@ xe_hwmon_power_max_interval_store(struct device *dev, struct device_attribute *a const char *buf, size_t count) { struct xe_hwmon *hwmon = dev_get_drvdata(dev); + struct xe_gt *mmio = xe_root_mmio_gt(hwmon->xe); u32 x, y, rxy, x_w = 2; /* 2 bits */ u64 tau4, r, max_win; unsigned long val; @@ -371,16 +376,16 @@ xe_hwmon_power_max_interval_store(struct device *dev, struct device_attribute *a rxy = REG_FIELD_PREP(PKG_PWR_LIM_1_TIME_X, x) | REG_FIELD_PREP(PKG_PWR_LIM_1_TIME_Y, y); - xe_pm_runtime_get(gt_to_xe(hwmon->gt)); + xe_pm_runtime_get(hwmon->xe); mutex_lock(&hwmon->hwmon_lock); - r = xe_mmio_rmw32(hwmon->gt, xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, sensor_index), + r = xe_mmio_rmw32(mmio, xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, sensor_index), PKG_PWR_LIM_1_TIME, rxy); mutex_unlock(&hwmon->hwmon_lock); - xe_pm_runtime_put(gt_to_xe(hwmon->gt)); + xe_pm_runtime_put(hwmon->xe); return count; } @@ -406,11 +411,11 @@ static umode_t xe_hwmon_attributes_visible(struct kobject *kobj, struct xe_hwmon *hwmon = dev_get_drvdata(dev); int ret = 0; - xe_pm_runtime_get(gt_to_xe(hwmon->gt)); + xe_pm_runtime_get(hwmon->xe); ret = xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, index)) ? attr->mode : 0; - xe_pm_runtime_put(gt_to_xe(hwmon->gt)); + xe_pm_runtime_put(hwmon->xe); return ret; } @@ -435,20 +440,24 @@ static const struct hwmon_channel_info * const hwmon_info[] = { }; /* I1 is exposed as power_crit or as curr_crit depending on bit 31 */ -static int xe_hwmon_pcode_read_i1(struct xe_gt *gt, u32 *uval) +static int xe_hwmon_pcode_read_i1(const struct xe_hwmon *hwmon, u32 *uval) { + struct xe_tile *root_tile = xe_device_get_root_tile(hwmon->xe); + /* Avoid Illegal Subcommand error */ - if (gt_to_xe(gt)->info.platform == XE_DG2) + if (hwmon->xe->info.platform == XE_DG2) return -ENXIO; - return xe_pcode_read(gt_to_tile(gt), PCODE_MBOX(PCODE_POWER_SETUP, + return xe_pcode_read(root_tile, PCODE_MBOX(PCODE_POWER_SETUP, POWER_SETUP_SUBCOMMAND_READ_I1, 0), uval, NULL); } -static int xe_hwmon_pcode_write_i1(struct xe_gt *gt, u32 uval) +static int xe_hwmon_pcode_write_i1(const struct xe_hwmon *hwmon, u32 uval) { - return xe_pcode_write(gt_to_tile(gt), PCODE_MBOX(PCODE_POWER_SETUP, + struct xe_tile *root_tile = xe_device_get_root_tile(hwmon->xe); + + return xe_pcode_write(root_tile, PCODE_MBOX(PCODE_POWER_SETUP, POWER_SETUP_SUBCOMMAND_WRITE_I1, 0), (uval & POWER_SETUP_I1_DATA_MASK)); } @@ -461,7 +470,7 @@ static int xe_hwmon_power_curr_crit_read(struct xe_hwmon *hwmon, int channel, mutex_lock(&hwmon->hwmon_lock); - ret = xe_hwmon_pcode_read_i1(hwmon->gt, &uval); + ret = xe_hwmon_pcode_read_i1(hwmon, &uval); if (ret) goto unlock; @@ -481,7 +490,7 @@ static int xe_hwmon_power_curr_crit_write(struct xe_hwmon *hwmon, int channel, mutex_lock(&hwmon->hwmon_lock); uval = DIV_ROUND_CLOSEST_ULL(value << POWER_SETUP_I1_SHIFT, scale_factor); - ret = xe_hwmon_pcode_write_i1(hwmon->gt, uval); + ret = xe_hwmon_pcode_write_i1(hwmon, uval); mutex_unlock(&hwmon->hwmon_lock); return ret; @@ -489,9 +498,10 @@ static int xe_hwmon_power_curr_crit_write(struct xe_hwmon *hwmon, int channel, static void xe_hwmon_get_voltage(struct xe_hwmon *hwmon, int channel, long *value) { + struct xe_gt *mmio = xe_root_mmio_gt(hwmon->xe); u64 reg_val; - reg_val = xe_mmio_read32(hwmon->gt, xe_hwmon_get_reg(hwmon, REG_GT_PERF_STATUS, channel)); + reg_val = xe_mmio_read32(mmio, xe_hwmon_get_reg(hwmon, REG_GT_PERF_STATUS, channel)); /* HW register value in units of 2.5 millivolt */ *value = DIV_ROUND_CLOSEST(REG_FIELD_GET(VOLTAGE_MASK, reg_val) * 2500, SF_VOLTAGE); } @@ -510,7 +520,7 @@ xe_hwmon_power_is_visible(struct xe_hwmon *hwmon, u32 attr, int channel) channel)) ? 0444 : 0; case hwmon_power_crit: if (channel == CHANNEL_PKG) - return (xe_hwmon_pcode_read_i1(hwmon->gt, &uval) || + return (xe_hwmon_pcode_read_i1(hwmon, &uval) || !(uval & POWER_SETUP_I1_WATTS)) ? 0 : 0644; break; case hwmon_power_label: @@ -563,10 +573,10 @@ xe_hwmon_curr_is_visible(const struct xe_hwmon *hwmon, u32 attr, int channel) switch (attr) { case hwmon_curr_crit: - return (xe_hwmon_pcode_read_i1(hwmon->gt, &uval) || + return (xe_hwmon_pcode_read_i1(hwmon, &uval) || (uval & POWER_SETUP_I1_WATTS)) ? 0 : 0644; case hwmon_curr_label: - return (xe_hwmon_pcode_read_i1(hwmon->gt, &uval) || + return (xe_hwmon_pcode_read_i1(hwmon, &uval) || (uval & POWER_SETUP_I1_WATTS)) ? 0 : 0444; break; default: @@ -654,7 +664,7 @@ xe_hwmon_is_visible(const void *drvdata, enum hwmon_sensor_types type, struct xe_hwmon *hwmon = (struct xe_hwmon *)drvdata; int ret; - xe_pm_runtime_get(gt_to_xe(hwmon->gt)); + xe_pm_runtime_get(hwmon->xe); switch (type) { case hwmon_power: @@ -674,7 +684,7 @@ xe_hwmon_is_visible(const void *drvdata, enum hwmon_sensor_types type, break; } - xe_pm_runtime_put(gt_to_xe(hwmon->gt)); + xe_pm_runtime_put(hwmon->xe); return ret; } @@ -686,7 +696,7 @@ xe_hwmon_read(struct device *dev, enum hwmon_sensor_types type, u32 attr, struct xe_hwmon *hwmon = dev_get_drvdata(dev); int ret; - xe_pm_runtime_get(gt_to_xe(hwmon->gt)); + xe_pm_runtime_get(hwmon->xe); switch (type) { case hwmon_power: @@ -706,7 +716,7 @@ xe_hwmon_read(struct device *dev, enum hwmon_sensor_types type, u32 attr, break; } - xe_pm_runtime_put(gt_to_xe(hwmon->gt)); + xe_pm_runtime_put(hwmon->xe); return ret; } @@ -718,7 +728,7 @@ xe_hwmon_write(struct device *dev, enum hwmon_sensor_types type, u32 attr, struct xe_hwmon *hwmon = dev_get_drvdata(dev); int ret; - xe_pm_runtime_get(gt_to_xe(hwmon->gt)); + xe_pm_runtime_get(hwmon->xe); switch (type) { case hwmon_power: @@ -732,7 +742,7 @@ xe_hwmon_write(struct device *dev, enum hwmon_sensor_types type, u32 attr, break; } - xe_pm_runtime_put(gt_to_xe(hwmon->gt)); + xe_pm_runtime_put(hwmon->xe); return ret; } @@ -771,6 +781,7 @@ static const struct hwmon_chip_info hwmon_chip_info = { static void xe_hwmon_get_preregistration_info(struct xe_device *xe) { + struct xe_gt *mmio = xe_root_mmio_gt(xe); struct xe_hwmon *hwmon = xe->hwmon; long energy; u64 val_sku_unit = 0; @@ -783,7 +794,7 @@ xe_hwmon_get_preregistration_info(struct xe_device *xe) */ pkg_power_sku_unit = xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU_UNIT, 0); if (xe_reg_is_valid(pkg_power_sku_unit)) { - val_sku_unit = xe_mmio_read32(hwmon->gt, pkg_power_sku_unit); + val_sku_unit = xe_mmio_read32(mmio, pkg_power_sku_unit); hwmon->scl_shift_power = REG_FIELD_GET(PKG_PWR_UNIT, val_sku_unit); hwmon->scl_shift_energy = REG_FIELD_GET(PKG_ENERGY_UNIT, val_sku_unit); hwmon->scl_shift_time = REG_FIELD_GET(PKG_TIME_UNIT, val_sku_unit); @@ -828,8 +839,8 @@ void xe_hwmon_register(struct xe_device *xe) if (devm_add_action_or_reset(dev, xe_hwmon_mutex_destroy, hwmon)) return; - /* primary GT to access device level properties */ - hwmon->gt = xe->tiles[0].primary_gt; + /* There's only one instance of hwmon per device */ + hwmon->xe = xe; xe_hwmon_get_preregistration_info(xe); -- 2.51.0 From ddc94d0b17e8ea8179ecbbefacac3fba0fb77265 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Mon, 26 Aug 2024 10:01:43 -0700 Subject: [PATCH 03/16] dma-buf: Split out dma fence array create into alloc and arm functions MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Useful to preallocate dma fence array and then arm in path of reclaim or a dma fence. v2: - s/arm/init (Christian) - Drop !array warn (Christian) v3: - Fix kernel doc typos (dim) Cc: Sumit Semwal Cc: Christian König Signed-off-by: Matthew Brost Reviewed-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20240826170144.2492062-2-matthew.brost@intel.com --- drivers/dma-buf/dma-fence-array.c | 78 ++++++++++++++++++++++--------- include/linux/dma-fence-array.h | 6 +++ 2 files changed, 63 insertions(+), 21 deletions(-) diff --git a/drivers/dma-buf/dma-fence-array.c b/drivers/dma-buf/dma-fence-array.c index c74ac197d5fe..8a08ffde31e7 100644 --- a/drivers/dma-buf/dma-fence-array.c +++ b/drivers/dma-buf/dma-fence-array.c @@ -144,37 +144,38 @@ const struct dma_fence_ops dma_fence_array_ops = { EXPORT_SYMBOL(dma_fence_array_ops); /** - * dma_fence_array_create - Create a custom fence array + * dma_fence_array_alloc - Allocate a custom fence array + * @num_fences: [in] number of fences to add in the array + * + * Return dma fence array on success, NULL on failure + */ +struct dma_fence_array *dma_fence_array_alloc(int num_fences) +{ + struct dma_fence_array *array; + + return kzalloc(struct_size(array, callbacks, num_fences), GFP_KERNEL); +} +EXPORT_SYMBOL(dma_fence_array_alloc); + +/** + * dma_fence_array_init - Init a custom fence array + * @array: [in] dma fence array to arm * @num_fences: [in] number of fences to add in the array * @fences: [in] array containing the fences * @context: [in] fence context to use * @seqno: [in] sequence number to use * @signal_on_any: [in] signal on any fence in the array * - * Allocate a dma_fence_array object and initialize the base fence with - * dma_fence_init(). - * In case of error it returns NULL. - * - * The caller should allocate the fences array with num_fences size - * and fill it with the fences it wants to add to the object. Ownership of this - * array is taken and dma_fence_put() is used on each fence on release. - * - * If @signal_on_any is true the fence array signals if any fence in the array - * signals, otherwise it signals when all fences in the array signal. + * Implementation of @dma_fence_array_create without allocation. Useful to init + * a preallocated dma fence array in the path of reclaim or dma fence signaling. */ -struct dma_fence_array *dma_fence_array_create(int num_fences, - struct dma_fence **fences, - u64 context, unsigned seqno, - bool signal_on_any) +void dma_fence_array_init(struct dma_fence_array *array, + int num_fences, struct dma_fence **fences, + u64 context, unsigned seqno, + bool signal_on_any) { - struct dma_fence_array *array; - WARN_ON(!num_fences || !fences); - array = kzalloc(struct_size(array, callbacks, num_fences), GFP_KERNEL); - if (!array) - return NULL; - array->num_fences = num_fences; spin_lock_init(&array->lock); @@ -200,6 +201,41 @@ struct dma_fence_array *dma_fence_array_create(int num_fences, */ while (num_fences--) WARN_ON(dma_fence_is_container(fences[num_fences])); +} +EXPORT_SYMBOL(dma_fence_array_init); + +/** + * dma_fence_array_create - Create a custom fence array + * @num_fences: [in] number of fences to add in the array + * @fences: [in] array containing the fences + * @context: [in] fence context to use + * @seqno: [in] sequence number to use + * @signal_on_any: [in] signal on any fence in the array + * + * Allocate a dma_fence_array object and initialize the base fence with + * dma_fence_init(). + * In case of error it returns NULL. + * + * The caller should allocate the fences array with num_fences size + * and fill it with the fences it wants to add to the object. Ownership of this + * array is taken and dma_fence_put() is used on each fence on release. + * + * If @signal_on_any is true the fence array signals if any fence in the array + * signals, otherwise it signals when all fences in the array signal. + */ +struct dma_fence_array *dma_fence_array_create(int num_fences, + struct dma_fence **fences, + u64 context, unsigned seqno, + bool signal_on_any) +{ + struct dma_fence_array *array; + + array = dma_fence_array_alloc(num_fences); + if (!array) + return NULL; + + dma_fence_array_init(array, num_fences, fences, + context, seqno, signal_on_any); return array; } diff --git a/include/linux/dma-fence-array.h b/include/linux/dma-fence-array.h index 29c5650c1038..079b3dec0a16 100644 --- a/include/linux/dma-fence-array.h +++ b/include/linux/dma-fence-array.h @@ -79,6 +79,12 @@ to_dma_fence_array(struct dma_fence *fence) for (index = 0, fence = dma_fence_array_first(head); fence; \ ++(index), fence = dma_fence_array_next(head, index)) +struct dma_fence_array *dma_fence_array_alloc(int num_fences); +void dma_fence_array_init(struct dma_fence_array *array, + int num_fences, struct dma_fence **fences, + u64 context, unsigned seqno, + bool signal_on_any); + struct dma_fence_array *dma_fence_array_create(int num_fences, struct dma_fence **fences, u64 context, unsigned seqno, -- 2.51.0 From bf758226c7e6927f1c2c34fd6eb2cb580b77f543 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Mon, 26 Aug 2024 10:01:44 -0700 Subject: [PATCH 04/16] drm/xe: Invalidate media_gt TLBs in PT code MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Testing on LNL has shown media GT's TLBs need to be invalidated via the GuC, update PT code appropriately. v2: - Do dma_fence_get before first call of invalidation_fence_init (Himal) - No need to check for valid chain fence (Himal) v3: - Use dma-fence-array Fixes: 3330361543fc ("drm/xe/lnl: Add LNL platform definition") Signed-off-by: Matthew Brost Acked-by: Christian König Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20240826170144.2492062-3-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_pt.c | 117 ++++++++++++++++++++++++++++++------- 1 file changed, 96 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index 579ed31b46db..d6353e8969f0 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -3,6 +3,8 @@ * Copyright © 2022 Intel Corporation */ +#include + #include "xe_pt.h" #include "regs/xe_gtt_defs.h" @@ -1627,9 +1629,11 @@ xe_pt_update_ops_rfence_interval(struct xe_vm_pgtable_update_ops *pt_update_ops, static int vma_reserve_fences(struct xe_device *xe, struct xe_vma *vma) { + int shift = xe_device_get_root_tile(xe)->media_gt ? 1 : 0; + if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) return dma_resv_reserve_fences(xe_vma_bo(vma)->ttm.base.resv, - xe->info.tile_count); + xe->info.tile_count << shift); return 0; } @@ -1816,6 +1820,7 @@ int xe_pt_update_ops_prepare(struct xe_tile *tile, struct xe_vma_ops *vops) struct xe_vm_pgtable_update_ops *pt_update_ops = &vops->pt_update_ops[tile->id]; struct xe_vma_op *op; + int shift = tile->media_gt ? 1 : 0; int err; lockdep_assert_held(&vops->vm->lock); @@ -1824,7 +1829,7 @@ int xe_pt_update_ops_prepare(struct xe_tile *tile, struct xe_vma_ops *vops) xe_pt_update_ops_init(pt_update_ops); err = dma_resv_reserve_fences(xe_vm_resv(vops->vm), - tile_to_xe(tile)->info.tile_count); + tile_to_xe(tile)->info.tile_count << shift); if (err) return err; @@ -1849,13 +1854,20 @@ int xe_pt_update_ops_prepare(struct xe_tile *tile, struct xe_vma_ops *vops) static void bind_op_commit(struct xe_vm *vm, struct xe_tile *tile, struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma *vma, struct dma_fence *fence) + struct xe_vma *vma, struct dma_fence *fence, + struct dma_fence *fence2) { - if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) + if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) { dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence, pt_update_ops->wait_vm_bookkeep ? DMA_RESV_USAGE_KERNEL : DMA_RESV_USAGE_BOOKKEEP); + if (fence2) + dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence2, + pt_update_ops->wait_vm_bookkeep ? + DMA_RESV_USAGE_KERNEL : + DMA_RESV_USAGE_BOOKKEEP); + } vma->tile_present |= BIT(tile->id); vma->tile_staged &= ~BIT(tile->id); if (xe_vma_is_userptr(vma)) { @@ -1875,13 +1887,20 @@ static void bind_op_commit(struct xe_vm *vm, struct xe_tile *tile, static void unbind_op_commit(struct xe_vm *vm, struct xe_tile *tile, struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma *vma, struct dma_fence *fence) + struct xe_vma *vma, struct dma_fence *fence, + struct dma_fence *fence2) { - if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) + if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) { dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence, pt_update_ops->wait_vm_bookkeep ? DMA_RESV_USAGE_KERNEL : DMA_RESV_USAGE_BOOKKEEP); + if (fence2) + dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence2, + pt_update_ops->wait_vm_bookkeep ? + DMA_RESV_USAGE_KERNEL : + DMA_RESV_USAGE_BOOKKEEP); + } vma->tile_present &= ~BIT(tile->id); if (!vma->tile_present) { list_del_init(&vma->combined_links.rebind); @@ -1898,7 +1917,8 @@ static void unbind_op_commit(struct xe_vm *vm, struct xe_tile *tile, static void op_commit(struct xe_vm *vm, struct xe_tile *tile, struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma_op *op, struct dma_fence *fence) + struct xe_vma_op *op, struct dma_fence *fence, + struct dma_fence *fence2) { xe_vm_assert_held(vm); @@ -1907,26 +1927,28 @@ static void op_commit(struct xe_vm *vm, if (!op->map.immediate && xe_vm_in_fault_mode(vm)) break; - bind_op_commit(vm, tile, pt_update_ops, op->map.vma, fence); + bind_op_commit(vm, tile, pt_update_ops, op->map.vma, fence, + fence2); break; case DRM_GPUVA_OP_REMAP: unbind_op_commit(vm, tile, pt_update_ops, - gpuva_to_vma(op->base.remap.unmap->va), fence); + gpuva_to_vma(op->base.remap.unmap->va), fence, + fence2); if (op->remap.prev) bind_op_commit(vm, tile, pt_update_ops, op->remap.prev, - fence); + fence, fence2); if (op->remap.next) bind_op_commit(vm, tile, pt_update_ops, op->remap.next, - fence); + fence, fence2); break; case DRM_GPUVA_OP_UNMAP: unbind_op_commit(vm, tile, pt_update_ops, - gpuva_to_vma(op->base.unmap.va), fence); + gpuva_to_vma(op->base.unmap.va), fence, fence2); break; case DRM_GPUVA_OP_PREFETCH: bind_op_commit(vm, tile, pt_update_ops, - gpuva_to_vma(op->base.prefetch.va), fence); + gpuva_to_vma(op->base.prefetch.va), fence, fence2); break; default: drm_warn(&vm->xe->drm, "NOT POSSIBLE"); @@ -1963,7 +1985,9 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops) struct xe_vm_pgtable_update_ops *pt_update_ops = &vops->pt_update_ops[tile->id]; struct dma_fence *fence; - struct invalidation_fence *ifence = NULL; + struct invalidation_fence *ifence = NULL, *mfence = NULL; + struct dma_fence **fences = NULL; + struct dma_fence_array *cf = NULL; struct xe_range_fence *rfence; struct xe_vma_op *op; int err = 0, i; @@ -1996,6 +2020,23 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops) err = -ENOMEM; goto kill_vm_tile1; } + if (tile->media_gt) { + mfence = kzalloc(sizeof(*ifence), GFP_KERNEL); + if (!mfence) { + err = -ENOMEM; + goto free_ifence; + } + fences = kmalloc_array(2, sizeof(*fences), GFP_KERNEL); + if (!fences) { + err = -ENOMEM; + goto free_ifence; + } + cf = dma_fence_array_alloc(2); + if (!cf) { + err = -ENOMEM; + goto free_ifence; + } + } } rfence = kzalloc(sizeof(*rfence), GFP_KERNEL); @@ -2027,19 +2068,50 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops) /* tlb invalidation must be done before signaling rebind */ if (ifence) { + if (mfence) + dma_fence_get(fence); invalidation_fence_init(tile->primary_gt, ifence, fence, pt_update_ops->start, pt_update_ops->last, vm->usm.asid); - fence = &ifence->base.base; + if (mfence) { + invalidation_fence_init(tile->media_gt, mfence, fence, + pt_update_ops->start, + pt_update_ops->last, vm->usm.asid); + fences[0] = &ifence->base.base; + fences[1] = &mfence->base.base; + dma_fence_array_init(cf, 2, fences, + vm->composite_fence_ctx, + vm->composite_fence_seqno++, + false); + fence = &cf->base; + } else { + fence = &ifence->base.base; + } } - dma_resv_add_fence(xe_vm_resv(vm), fence, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); + if (!mfence) { + dma_resv_add_fence(xe_vm_resv(vm), fence, + pt_update_ops->wait_vm_bookkeep ? + DMA_RESV_USAGE_KERNEL : + DMA_RESV_USAGE_BOOKKEEP); - list_for_each_entry(op, &vops->list, link) - op_commit(vops->vm, tile, pt_update_ops, op, fence); + list_for_each_entry(op, &vops->list, link) + op_commit(vops->vm, tile, pt_update_ops, op, fence, NULL); + } else { + dma_resv_add_fence(xe_vm_resv(vm), &ifence->base.base, + pt_update_ops->wait_vm_bookkeep ? + DMA_RESV_USAGE_KERNEL : + DMA_RESV_USAGE_BOOKKEEP); + + dma_resv_add_fence(xe_vm_resv(vm), &mfence->base.base, + pt_update_ops->wait_vm_bookkeep ? + DMA_RESV_USAGE_KERNEL : + DMA_RESV_USAGE_BOOKKEEP); + + list_for_each_entry(op, &vops->list, link) + op_commit(vops->vm, tile, pt_update_ops, op, + &ifence->base.base, &mfence->base.base); + } if (pt_update_ops->needs_userptr_lock) up_read(&vm->userptr.notifier_lock); @@ -2049,6 +2121,9 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops) free_rfence: kfree(rfence); free_ifence: + kfree(cf); + kfree(fences); + kfree(mfence); kfree(ifence); kill_vm_tile1: if (err != -EAGAIN && tile->id) -- 2.51.0 From a1498ab229ca16dfc35331dfb309f8e28b84bb5d Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Fri, 30 Aug 2024 15:20:59 +0200 Subject: [PATCH 05/16] drm/xe/pf: Add thresholds to the VF KLV config MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit We are pushing threshold KLV to the GuC immediately during the threshold provisioning, but those configs will be lost during a GT reset. Include threshold KLVs while encoding full VF config buffer to make sure the GuC receives all of the config KLVs. Signed-off-by: Michal Wajdeczko Reviewed-by: Piotr Piórkowski Link: https://patchwork.freedesktop.org/patch/msgid/20240830132100.1704-2-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c index 497af3949b19..151eb69ab4ae 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -277,6 +277,14 @@ static u32 encode_config(u32 *cfg, const struct xe_gt_sriov_config *config) cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_PREEMPT_TIMEOUT); cfg[n++] = config->preempt_timeout; +#define encode_threshold_config(TAG, ...) ({ \ + cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_THRESHOLD_##TAG); \ + cfg[n++] = config->thresholds[MAKE_XE_GUC_KLV_THRESHOLD_INDEX(TAG)]; \ +}); + + MAKE_XE_GUC_KLV_THRESHOLDS_SET(encode_threshold_config); +#undef encode_threshold_config + return n; } -- 2.51.0 From da6ec743397702e0b551bcacfeaa48829d303a29 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Fri, 30 Aug 2024 15:21:00 +0200 Subject: [PATCH 06/16] drm/xe/pf: Reset thresholds when releasing a VF config MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit As part of the VF config release, we should reset all parameters, including thresholds, to always start with the clean VF config. Signed-off-by: Michal Wajdeczko Reviewed-by: Piotr Piórkowski Link: https://patchwork.freedesktop.org/patch/msgid/20240830132100.1704-3-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c index 151eb69ab4ae..a95e546b7744 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -1842,6 +1842,18 @@ u32 xe_gt_sriov_pf_config_get_threshold(struct xe_gt *gt, unsigned int vfid, return value; } +static void pf_reset_config_thresholds(struct xe_gt *gt, struct xe_gt_sriov_config *config) +{ + lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); + +#define reset_threshold_config(TAG, ...) ({ \ + config->thresholds[MAKE_XE_GUC_KLV_THRESHOLD_INDEX(TAG)] = 0; \ +}); + + MAKE_XE_GUC_KLV_THRESHOLDS_SET(reset_threshold_config); +#undef reset_threshold_config +} + static void pf_release_vf_config(struct xe_gt *gt, unsigned int vfid) { struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); @@ -1857,6 +1869,7 @@ static void pf_release_vf_config(struct xe_gt *gt, unsigned int vfid) pf_release_config_ctxs(gt, config); pf_release_config_dbs(gt, config); pf_reset_config_sched(gt, config); + pf_reset_config_thresholds(gt, config); } /** -- 2.51.0 From 9f6b47907e2d01bfa90c3003e42b6dfeefd8d03a Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Mon, 2 Sep 2024 21:07:26 +0200 Subject: [PATCH 07/16] drm/xe: Remove redundant [drm] tag from xe_assert() message Since commit 178c0a33c421 ("drm/print: Add generic drm dev printk function") the output from drm_WARN() includes previously missing the [drm] tag, so now xe_assert() is printing it twice: [ ] xe 0000:00:02.0: [drm] [drm] Assertion `false` failed! Signed-off-by: Michal Wajdeczko Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240902190726.1748-1-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_assert.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_assert.h b/drivers/gpu/drm/xe/xe_assert.h index 8b0cc1bc9327..e22bbf57fca7 100644 --- a/drivers/gpu/drm/xe/xe_assert.h +++ b/drivers/gpu/drm/xe/xe_assert.h @@ -81,7 +81,7 @@ #if IS_ENABLED(CONFIG_DRM_XE_DEBUG) #define __xe_assert_msg(xe, condition, msg, arg...) ({ \ - (void)drm_WARN(&(xe)->drm, !(condition), "[" DRM_NAME "] Assertion `%s` failed!\n" msg, \ + (void)drm_WARN(&(xe)->drm, !(condition), "Assertion `%s` failed!\n" msg, \ __stringify(condition), ## arg); \ }) #else -- 2.51.0 From bc947d9a8c3ebd207e52c0e35cfc88f3e1abe54f Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Fri, 30 Aug 2024 14:35:06 -0400 Subject: [PATCH 08/16] drm/xe: Add missing runtime reference to wedged upon gt_reset Fixes this missed case: xe 0000:00:02.0: [drm] Missing outer runtime PM protection WARNING: CPU: 99 PID: 1455 at drivers/gpu/drm/xe/xe_pm.c:564 xe_pm_runtime_get_noresume+0x48/0x60 [xe] Call Trace: ? show_regs+0x67/0x70 ? __warn+0x94/0x1b0 ? xe_pm_runtime_get_noresume+0x48/0x60 [xe] ? report_bug+0x1b7/0x1d0 ? handle_bug+0x46/0x80 ? exc_invalid_op+0x19/0x70 ? asm_exc_invalid_op+0x1b/0x20 ? xe_pm_runtime_get_noresume+0x48/0x60 [xe] xe_device_declare_wedged+0x91/0x280 [xe] gt_reset_worker+0xa2/0x250 [xe] v2: Also move get and get the right Fixes tag (Himal, Brost) Fixes: fb74b205cdd2 ("drm/xe: Introduce a simple wedged state") Cc: Himal Prasad Ghimiray Cc: Matthew Brost Reviewed-by: Jonathan Cavitt Reviewed-by: Himal Prasad Ghimiray Link: https://patchwork.freedesktop.org/patch/msgid/20240830183507.298351-1-rodrigo.vivi@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index ffacddb587b2..f0dc2bf24c7b 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -752,12 +752,13 @@ static int gt_reset(struct xe_gt *gt) xe_gt_info(gt, "reset started\n"); + xe_pm_runtime_get(gt_to_xe(gt)); + if (xe_fault_inject_gt_reset()) { err = -ECANCELED; goto err_fail; } - xe_pm_runtime_get(gt_to_xe(gt)); xe_gt_sanitize(gt); err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); @@ -792,11 +793,11 @@ err_out: XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); err_msg: XE_WARN_ON(xe_uc_start(>->uc)); - xe_pm_runtime_put(gt_to_xe(gt)); err_fail: xe_gt_err(gt, "reset failed (%pe)\n", ERR_PTR(err)); xe_device_declare_wedged(gt_to_xe(gt)); + xe_pm_runtime_put(gt_to_xe(gt)); return err; } -- 2.51.0 From 8da19441d0a02b53e362df81843bb20db3a8006a Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Fri, 30 Aug 2024 14:35:07 -0400 Subject: [PATCH 09/16] drm/xe/display: Avoid encoder_suspend at runtime suspend Fix circular locking dependency on runtime suspend. <4> [74.952215] ====================================================== <4> [74.952217] WARNING: possible circular locking dependency detected <4> [74.952219] 6.10.0-rc7-xe #1 Not tainted <4> [74.952221] ------------------------------------------------------ <4> [74.952223] kworker/7:1/82 is trying to acquire lock: <4> [74.952226] ffff888120548488 (&dev->mode_config.mutex){+.+.}-{3:3}, at: drm_modeset_lock_all+0x40/0x1e0 [drm] <4> [74.952260] but task is already holding lock: <4> [74.952262] ffffffffa0ae59c0 (xe_pm_runtime_lockdep_map){+.+.}-{0:0}, at: xe_pm_runtime_suspend+0x2f/0x340 [xe] <4> [74.952322] which lock already depends on the new lock. The commit 'b1d90a86 ("drm/xe: Use the encoder suspend helper also used by the i915 driver")' didn't do anything wrong. It actually fixed a critical bug, because the encoder_suspend was never getting actually called because it was returning if (has_display(xe)) instead of if (!has_display(xe)). However, this ended up introducing the encoder suspend calls in the runtime routines as well, causing the circular locking dependency. Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/2304 Fixes: b1d90a862c89 ("drm/xe: Use the encoder suspend helper also used by the i915 driver") Cc: Imre Deak Reviewed-by: Jonathan Cavitt Link: https://patchwork.freedesktop.org/patch/msgid/20240830183507.298351-2-rodrigo.vivi@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/display/xe_display.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/display/xe_display.c b/drivers/gpu/drm/xe/display/xe_display.c index ae92012253f8..75736faf2a80 100644 --- a/drivers/gpu/drm/xe/display/xe_display.c +++ b/drivers/gpu/drm/xe/display/xe_display.c @@ -345,10 +345,10 @@ void xe_display_pm_suspend(struct xe_device *xe, bool runtime) intel_hpd_cancel_work(xe); - if (!runtime && has_display(xe)) + if (!runtime && has_display(xe)) { intel_display_driver_suspend_access(xe); - - intel_encoder_suspend_all(&xe->display); + intel_encoder_suspend_all(&xe->display); + } intel_opregion_suspend(display, s2idle ? PCI_D1 : PCI_D3cold); -- 2.51.0 From 34bb7b813ab398106f700b0a6b218509bb0b904c Mon Sep 17 00:00:00 2001 From: =?utf8?q?Thomas=20Hellstr=C3=B6m?= Date: Tue, 3 Sep 2024 11:42:32 +0200 Subject: [PATCH 10/16] drm/xe: Use xe_pm_runtime_get in xe_bo_move() if reclaim-safe. MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit xe_bo_move() might be called in the TTM swapout path from validation by another TTM device. If so, we are not likely to have a RPM reference. So iff xe_pm_runtime_get() is safe to call from reclaim, use it instead of xe_pm_runtime_get_noresume(). Strictly this is currently needed only if handle_system_ccs is true, but use xe_pm_runtime_get() if possible anyway to increase test coverage. At the same time warn if handle_system_ccs is true and we can't call xe_pm_runtime_get() from reclaim context. This will likely trip if someone tries to enable SRIOV on LNL, without fixing Xe SRIOV runtime resume / suspend. Cc: Rodrigo Vivi Cc: Matthew Brost Cc: Matthew Auld Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20240903094232.166342-1-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/xe/xe_bo.c | 11 ++++++++++- drivers/gpu/drm/xe/xe_pm.c | 9 ++++++++- drivers/gpu/drm/xe/xe_pm.h | 1 + 3 files changed, 19 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 1c18ba9bd099..0c64d3b3155e 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -758,7 +758,16 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, xe_assert(xe, migrate); trace_xe_bo_move(bo, new_mem->mem_type, old_mem_type, move_lacks_source); - xe_pm_runtime_get_noresume(xe); + if (xe_rpm_reclaim_safe(xe)) { + /* + * We might be called through swapout in the validation path of + * another TTM device, so unconditionally acquire rpm here. + */ + xe_pm_runtime_get(xe); + } else { + drm_WARN_ON(&xe->drm, handle_system_ccs); + xe_pm_runtime_get_noresume(xe); + } if (xe_bo_is_pinned(bo) && !xe_bo_is_user(bo)) { /* diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index 2600c936527e..e518557e0eec 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -79,7 +79,14 @@ static struct lockdep_map xe_pm_runtime_nod3cold_map = { }; #endif -static bool __maybe_unused xe_rpm_reclaim_safe(const struct xe_device *xe) +/** + * xe_rpm_reclaim_safe() - Whether runtime resume can be done from reclaim context + * @xe: The xe device. + * + * Return: true if it is safe to runtime resume from reclaim context. + * false otherwise. + */ +bool xe_rpm_reclaim_safe(const struct xe_device *xe) { return !xe->d3cold.capable && !xe->info.has_sriov; } diff --git a/drivers/gpu/drm/xe/xe_pm.h b/drivers/gpu/drm/xe/xe_pm.h index 9aef673b1c8a..998d1ed64556 100644 --- a/drivers/gpu/drm/xe/xe_pm.h +++ b/drivers/gpu/drm/xe/xe_pm.h @@ -31,6 +31,7 @@ bool xe_pm_runtime_resume_and_get(struct xe_device *xe); void xe_pm_assert_unbounded_bridge(struct xe_device *xe); int xe_pm_set_vram_threshold(struct xe_device *xe, u32 threshold); void xe_pm_d3cold_allowed_toggle(struct xe_device *xe); +bool xe_rpm_reclaim_safe(const struct xe_device *xe); struct task_struct *xe_pm_read_callback_task(struct xe_device *xe); int xe_pm_module_init(void); -- 2.51.0 From 13a48a0fa52352f9fe58e2e1927670dcfea64c3a Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Mon, 2 Sep 2024 21:29:53 +0200 Subject: [PATCH 11/16] drm/xe/pf: Sanitize VF scratch registers on FLR MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Some VF accessible registers (like GuC scratch registers) must be explicitly reset during the FLR. While this is today done by the GuC firmware, according to the design, this should be responsibility of the PF driver, as future platforms may require more registers to be reset. Likewise GuC, the PF can access VFs registers by adding some platform specific offset to the original register address. Signed-off-by: Michal Wajdeczko Reviewed-by: Piotr Piórkowski Link: https://patchwork.freedesktop.org/patch/msgid/20240902192953.1792-1-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_gt_sriov_pf.c | 52 +++++++++++++++++++++ drivers/gpu/drm/xe/xe_gt_sriov_pf.h | 1 + drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c | 3 +- 3 files changed, 55 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c index 905f409db74b..919d960165d5 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c @@ -5,8 +5,10 @@ #include +#include "regs/xe_guc_regs.h" #include "regs/xe_regs.h" +#include "xe_gt.h" #include "xe_gt_sriov_pf.h" #include "xe_gt_sriov_pf_config.h" #include "xe_gt_sriov_pf_control.h" @@ -89,6 +91,56 @@ void xe_gt_sriov_pf_init_hw(struct xe_gt *gt) xe_gt_sriov_pf_service_update(gt); } +static u32 pf_get_vf_regs_stride(struct xe_device *xe) +{ + return GRAPHICS_VERx100(xe) > 1200 ? 0x400 : 0x1000; +} + +static struct xe_reg xe_reg_vf_to_pf(struct xe_reg vf_reg, unsigned int vfid, u32 stride) +{ + struct xe_reg pf_reg = vf_reg; + + pf_reg.vf = 0; + pf_reg.addr += stride * vfid; + + return pf_reg; +} + +static void pf_clear_vf_scratch_regs(struct xe_gt *gt, unsigned int vfid) +{ + u32 stride = pf_get_vf_regs_stride(gt_to_xe(gt)); + struct xe_reg scratch; + int n, count; + + if (xe_gt_is_media_type(gt)) { + count = MED_VF_SW_FLAG_COUNT; + for (n = 0; n < count; n++) { + scratch = xe_reg_vf_to_pf(MED_VF_SW_FLAG(n), vfid, stride); + xe_mmio_write32(gt, scratch, 0); + } + } else { + count = VF_SW_FLAG_COUNT; + for (n = 0; n < count; n++) { + scratch = xe_reg_vf_to_pf(VF_SW_FLAG(n), vfid, stride); + xe_mmio_write32(gt, scratch, 0); + } + } +} + +/** + * xe_gt_sriov_pf_sanitize_hw() - Reset hardware state related to a VF. + * @gt: the &xe_gt + * @vfid: the VF identifier + * + * This function can only be called on PF. + */ +void xe_gt_sriov_pf_sanitize_hw(struct xe_gt *gt, unsigned int vfid) +{ + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + + pf_clear_vf_scratch_regs(gt, vfid); +} + /** * xe_gt_sriov_pf_restart - Restart SR-IOV support after a GT reset. * @gt: the &xe_gt diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf.h index f0cb726a6919..96fab779a906 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.h @@ -11,6 +11,7 @@ struct xe_gt; #ifdef CONFIG_PCI_IOV int xe_gt_sriov_pf_init_early(struct xe_gt *gt); void xe_gt_sriov_pf_init_hw(struct xe_gt *gt); +void xe_gt_sriov_pf_sanitize_hw(struct xe_gt *gt, unsigned int vfid); void xe_gt_sriov_pf_restart(struct xe_gt *gt); #else static inline int xe_gt_sriov_pf_init_early(struct xe_gt *gt) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c index 02f7328bd6ce..b4fd5a81aff1 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c @@ -9,6 +9,7 @@ #include "xe_device.h" #include "xe_gt.h" +#include "xe_gt_sriov_pf.h" #include "xe_gt_sriov_pf_config.h" #include "xe_gt_sriov_pf_control.h" #include "xe_gt_sriov_pf_helpers.h" @@ -1008,7 +1009,7 @@ static bool pf_exit_vf_flr_reset_mmio(struct xe_gt *gt, unsigned int vfid) if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_MMIO)) return false; - /* XXX: placeholder */ + xe_gt_sriov_pf_sanitize_hw(gt, vfid); pf_enter_vf_flr_send_finish(gt, vfid); return true; -- 2.51.0 From cd89de14bbacce1fc060fdfab75bacf95b1c5d40 Mon Sep 17 00:00:00 2001 From: Nitin Gote Date: Fri, 23 Aug 2024 13:36:43 +0530 Subject: [PATCH 12/16] drm/xe: Replace double space with single space after comma Avoid using double space, ", " in function or macro parameters where it's not required by any alignment purpose. Replace it with a single space, ", ". Signed-off-by: Nitin Gote Reviewed-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20240823080643.2461992-1-nitin.r.gote@intel.com Signed-off-by: Nirmoy Das --- drivers/gpu/drm/xe/regs/xe_reg_defs.h | 2 +- drivers/gpu/drm/xe/xe_guc.c | 2 +- drivers/gpu/drm/xe/xe_guc_ct.c | 4 ++-- drivers/gpu/drm/xe/xe_irq.c | 4 ++-- drivers/gpu/drm/xe/xe_trace_bo.h | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/xe/regs/xe_reg_defs.h b/drivers/gpu/drm/xe/regs/xe_reg_defs.h index 23f7dc5bbe99..51fd40ffafcb 100644 --- a/drivers/gpu/drm/xe/regs/xe_reg_defs.h +++ b/drivers/gpu/drm/xe/regs/xe_reg_defs.h @@ -128,7 +128,7 @@ struct xe_reg_mcr { * options. */ #define XE_REG_MCR(r_, ...) ((const struct xe_reg_mcr){ \ - .__reg = XE_REG_INITIALIZER(r_, ##__VA_ARGS__, .mcr = 1) \ + .__reg = XE_REG_INITIALIZER(r_, ##__VA_ARGS__, .mcr = 1) \ }) static inline bool xe_reg_is_valid(struct xe_reg r) diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 52df28032a6f..c67d4807f37d 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -985,7 +985,7 @@ timeout: BUILD_BUG_ON(FIELD_MAX(GUC_HXG_MSG_0_TYPE) != GUC_HXG_TYPE_RESPONSE_SUCCESS); BUILD_BUG_ON((GUC_HXG_TYPE_RESPONSE_SUCCESS ^ GUC_HXG_TYPE_RESPONSE_FAILURE) != 1); - ret = xe_mmio_wait32(gt, reply_reg, resp_mask, resp_mask, + ret = xe_mmio_wait32(gt, reply_reg, resp_mask, resp_mask, 1000000, &header, false); if (unlikely(FIELD_GET(GUC_HXG_MSG_0_ORIGIN, header) != diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index f24dd5223926..4b95f75b1546 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -182,7 +182,7 @@ int xe_guc_ct_init(struct xe_guc_ct *ct) spin_lock_init(&ct->fast_lock); xa_init(&ct->fence_lookup); INIT_WORK(&ct->g2h_worker, g2h_worker_func); - INIT_DELAYED_WORK(&ct->safe_mode_worker, safe_mode_worker_func); + INIT_DELAYED_WORK(&ct->safe_mode_worker, safe_mode_worker_func); init_waitqueue_head(&ct->wq); init_waitqueue_head(&ct->g2h_fence_wq); @@ -852,7 +852,7 @@ static bool retry_failure(struct xe_guc_ct *ct, int ret) #define ct_alive(ct) \ (xe_guc_ct_enabled(ct) && !ct->ctbs.h2g.info.broken && \ !ct->ctbs.g2h.info.broken) - if (!wait_event_interruptible_timeout(ct->wq, ct_alive(ct), HZ * 5)) + if (!wait_event_interruptible_timeout(ct->wq, ct_alive(ct), HZ * 5)) return false; #undef ct_alive diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c index 5f2c368c35ad..14c3a476597a 100644 --- a/drivers/gpu/drm/xe/xe_irq.c +++ b/drivers/gpu/drm/xe/xe_irq.c @@ -173,7 +173,7 @@ void xe_irq_enable_hwe(struct xe_gt *gt) if (ccs_mask & (BIT(0)|BIT(1))) xe_mmio_write32(gt, CCS0_CCS1_INTR_MASK, ~dmask); if (ccs_mask & (BIT(2)|BIT(3))) - xe_mmio_write32(gt, CCS2_CCS3_INTR_MASK, ~dmask); + xe_mmio_write32(gt, CCS2_CCS3_INTR_MASK, ~dmask); } if (xe_gt_is_media_type(gt) || MEDIA_VER(xe) < 13) { @@ -504,7 +504,7 @@ static void gt_irq_reset(struct xe_tile *tile) if (ccs_mask & (BIT(0)|BIT(1))) xe_mmio_write32(mmio, CCS0_CCS1_INTR_MASK, ~0); if (ccs_mask & (BIT(2)|BIT(3))) - xe_mmio_write32(mmio, CCS2_CCS3_INTR_MASK, ~0); + xe_mmio_write32(mmio, CCS2_CCS3_INTR_MASK, ~0); if ((tile->media_gt && xe_hw_engine_mask_per_class(tile->media_gt, XE_ENGINE_CLASS_OTHER)) || diff --git a/drivers/gpu/drm/xe/xe_trace_bo.h b/drivers/gpu/drm/xe/xe_trace_bo.h index 9b1a1d4304ae..30a3cfbaaa09 100644 --- a/drivers/gpu/drm/xe/xe_trace_bo.h +++ b/drivers/gpu/drm/xe/xe_trace_bo.h @@ -189,7 +189,7 @@ DECLARE_EVENT_CLASS(xe_vm, ), TP_printk("dev=%s, vm=%p, asid=0x%05x", __get_str(dev), - __entry->vm, __entry->asid) + __entry->vm, __entry->asid) ); DEFINE_EVENT(xe_vm, xe_vm_kill, -- 2.51.0 From 89076b5a8b4e0a01040585e156a0b014cd472fd3 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 28 Aug 2024 11:43:42 +0100 Subject: [PATCH 13/16] drm/xe: prevent potential UAF in pf_provision_vf_ggtt() The node ptr can point to an already freed ptr, if we hit the path with an already allocated node. We later dereference that pointer with: xe_gt_assert(gt, !xe_ggtt_node_allocated(node)); which is a potential UAF. Fix this by not stashing the ptr for node. Also since it is likely a bad idea to leave config->ggtt_region pointing to a stale ptr, also set that to NULL by calling pf_release_vf_config_ggtt() instead of pf_release_ggtt(). Fixes: 34e804220f69 ("drm/xe: Make xe_ggtt_node struct independent") Signed-off-by: Matthew Auld Cc: Matthew Brost Cc: Rodrigo Vivi Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240828104341.180111-2-matthew.auld@intel.com --- drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c index a95e546b7744..8250ef71e685 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -399,7 +399,7 @@ static void pf_release_vf_config_ggtt(struct xe_gt *gt, struct xe_gt_sriov_confi static int pf_provision_vf_ggtt(struct xe_gt *gt, unsigned int vfid, u64 size) { struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); - struct xe_ggtt_node *node = config->ggtt_region; + struct xe_ggtt_node *node; struct xe_tile *tile = gt_to_tile(gt); struct xe_ggtt *ggtt = tile->mem.ggtt; u64 alignment = pf_get_ggtt_alignment(gt); @@ -411,14 +411,14 @@ static int pf_provision_vf_ggtt(struct xe_gt *gt, unsigned int vfid, u64 size) size = round_up(size, alignment); - if (xe_ggtt_node_allocated(node)) { + if (xe_ggtt_node_allocated(config->ggtt_region)) { err = pf_distribute_config_ggtt(tile, vfid, 0, 0); if (unlikely(err)) return err; - pf_release_ggtt(tile, node); + pf_release_vf_config_ggtt(gt, config); } - xe_gt_assert(gt, !xe_ggtt_node_allocated(node)); + xe_gt_assert(gt, !xe_ggtt_node_allocated(config->ggtt_region)); if (!size) return 0; -- 2.51.0 From 1ff14648dc58b3de39b9d241607a41c833110d90 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 28 Aug 2024 10:22:58 +0100 Subject: [PATCH 14/16] drm/xe/pat: sanity check compression and coh_mode MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit There is an implicit assumption in the driver that compression and coh_1way+ are mutually exclusive. If this is ever not true then userptr and imported dma-buf from external device will have uncleared ccs state. Add a build bug for this so we don't forget. Signed-off-by: Matthew Auld Cc: Thomas Hellström Cc: Nirmoy Das Reviewed-by: Nirmoy Das Link: https://patchwork.freedesktop.org/patch/msgid/20240828092257.169063-2-matthew.auld@intel.com --- drivers/gpu/drm/xe/xe_pat.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c index f291a1730024..5bf48e99c939 100644 --- a/drivers/gpu/drm/xe/xe_pat.c +++ b/drivers/gpu/drm/xe/xe_pat.c @@ -100,6 +100,10 @@ static const struct xe_pat_table_entry xelpg_pat_table[] = { * Reserved entries should be programmed with the maximum caching, minimum * coherency (which matches an all-0's encoding), so we can just omit them * in the table. + * + * Note: There is an implicit assumption in the driver that compression and + * coh_1way+ are mutually exclusive. If this is ever not true then userptr + * and imported dma-buf from external device will have uncleared ccs state. */ #define XE2_PAT(no_promote, comp_en, l3clos, l3_policy, l4_policy, __coh_mode) \ { \ @@ -109,7 +113,8 @@ static const struct xe_pat_table_entry xelpg_pat_table[] = { REG_FIELD_PREP(XE2_L3_POLICY, l3_policy) | \ REG_FIELD_PREP(XE2_L4_POLICY, l4_policy) | \ REG_FIELD_PREP(XE2_COH_MODE, __coh_mode), \ - .coh_mode = __coh_mode ? XE_COH_AT_LEAST_1WAY : XE_COH_NONE \ + .coh_mode = (BUILD_BUG_ON_ZERO(__coh_mode && comp_en) || __coh_mode) ? \ + XE_COH_AT_LEAST_1WAY : XE_COH_NONE \ } static const struct xe_pat_table_entry xe2_pat_table[] = { -- 2.51.0 From ae304b054520fec0a5ad5dec103a37abb53fef0e Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 4 Sep 2024 12:46:47 +0300 Subject: [PATCH 15/16] drm/xe/pciids: add some missing ADL-N PCI IDs Similar to commit 425b463859ed ("drm/i915: Update ADL-N PCI IDs"). Reviewed-by: Sai Teja Pottumuttu Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/47d543393e4026588401a03c4e3ce12ce29780e3.1725443121.git.jani.nikula@intel.com --- include/drm/intel/xe_pciids.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/drm/intel/xe_pciids.h b/include/drm/intel/xe_pciids.h index 644872a35c35..4aab1bc2327e 100644 --- a/include/drm/intel/xe_pciids.h +++ b/include/drm/intel/xe_pciids.h @@ -97,7 +97,9 @@ #define XE_ADLN_IDS(MACRO__, ...) \ MACRO__(0x46D0, ## __VA_ARGS__), \ MACRO__(0x46D1, ## __VA_ARGS__), \ - MACRO__(0x46D2, ## __VA_ARGS__) + MACRO__(0x46D2, ## __VA_ARGS__), \ + MACRO__(0x46D3, ## __VA_ARGS__), \ + MACRO__(0x46D4, ## __VA_ARGS__) /* RPL-S */ #define XE_RPLS_IDS(MACRO__, ...) \ -- 2.51.0 From d454902a690db47f1880f963514bbf0fc7a129a8 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 4 Sep 2024 12:46:48 +0300 Subject: [PATCH 16/16] drm/xe/pciids: separate RPL-U and RPL-P PCI IDs Avoid including PCI IDs for one platform to the PCI IDs of another. It's more clear to deal with them completely separately at the PCI ID macro level. Reviewed-by: Sai Teja Pottumuttu Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/4868d36fbfa8c38ea2d490bca82cf6370b8d65dd.1725443121.git.jani.nikula@intel.com --- drivers/gpu/drm/xe/xe_pci.c | 1 + include/drm/intel/xe_pciids.h | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 7eb00a87b786..14fe08e40c98 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -383,6 +383,7 @@ static const struct pci_device_id pciidlist[] = { XE_ADLS_IDS(INTEL_VGA_DEVICE, &adl_s_desc), XE_ADLP_IDS(INTEL_VGA_DEVICE, &adl_p_desc), XE_ADLN_IDS(INTEL_VGA_DEVICE, &adl_n_desc), + XE_RPLU_IDS(INTEL_VGA_DEVICE, &adl_p_desc), XE_RPLP_IDS(INTEL_VGA_DEVICE, &adl_p_desc), XE_RPLS_IDS(INTEL_VGA_DEVICE, &adl_s_desc), XE_DG1_IDS(INTEL_VGA_DEVICE, &dg1_desc), diff --git a/include/drm/intel/xe_pciids.h b/include/drm/intel/xe_pciids.h index 4aab1bc2327e..7a9a7d0a89ca 100644 --- a/include/drm/intel/xe_pciids.h +++ b/include/drm/intel/xe_pciids.h @@ -122,7 +122,6 @@ /* RPL-P */ #define XE_RPLP_IDS(MACRO__, ...) \ - XE_RPLU_IDS(MACRO__, ## __VA_ARGS__), \ MACRO__(0xA720, ## __VA_ARGS__), \ MACRO__(0xA7A0, ## __VA_ARGS__), \ MACRO__(0xA7A8, ## __VA_ARGS__), \ -- 2.51.0