The WA buffer we use to capture context utilization contains GGTT
references. This means its instructions have to be either fixed or
re-emitted during VF post-migration recovery.
This patch adds re-emitting content of the utilization WA BB during
the recovery.
The way we write to vram requires scratch buffer to be used before
the whole block is memcopied. We are re-using a scratch buffer
introduced in earlier part of the recovery. This is not a performance
optimization, but a necessity to avoid creating dependencies between
locks.
v2: Notable rebase after "Prepare WA BB setup for more users" patch
v3: Added error propagation
Signed-off-by: Tomasz Lis <tomasz.lis@intel.com>
Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Michal Winiarski <michal.winiarski@intel.com>
Reviewed-by: Michal Winiarski <michal.winiarski@intel.com>
Link: https://lore.kernel.org/r/20250802031045.1127138-8-tomasz.lis@intel.com
Signed-off-by: Michał Winiarski <michal.winiarski@intel.com>
* within all LRCs of a queue.
* @q: the &xe_exec_queue struct instance containing target LRCs
* @scratch: scratch buffer to be used as temporary storage
+ *
+ * Returns: zero on success, negative error code on failure
*/
-void xe_exec_queue_contexts_hwsp_rebase(struct xe_exec_queue *q, void *scratch)
+int xe_exec_queue_contexts_hwsp_rebase(struct xe_exec_queue *q, void *scratch)
{
int i;
+ int err = 0;
for (i = 0; i < q->width; ++i) {
xe_lrc_update_memirq_regs_with_address(q->lrc[i], q->hwe, scratch);
xe_lrc_update_hwctx_regs_with_address(q->lrc[i]);
+ err = xe_lrc_setup_wa_bb_with_scratch(q->lrc[i], q->hwe, scratch);
+ if (err)
+ break;
}
+
+ return err;
}
/**
struct xe_vm *vm);
void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q);
-void xe_exec_queue_contexts_hwsp_rebase(struct xe_exec_queue *q, void *scratch);
+int xe_exec_queue_contexts_hwsp_rebase(struct xe_exec_queue *q, void *scratch);
void xe_exec_queue_jobs_ring_restore(struct xe_exec_queue *q);
* exec queues registered to given GuC.
* @guc: the &xe_guc struct instance
* @scratch: scratch buffer to be used as temporary storage
+ *
+ * Returns: zero on success, negative error code on failure.
*/
-void xe_guc_contexts_hwsp_rebase(struct xe_guc *guc, void *scratch)
+int xe_guc_contexts_hwsp_rebase(struct xe_guc *guc, void *scratch)
{
struct xe_exec_queue *q;
unsigned long index;
+ int err = 0;
mutex_lock(&guc->submission_state.lock);
- xa_for_each(&guc->submission_state.exec_queue_lookup, index, q)
- xe_exec_queue_contexts_hwsp_rebase(q, scratch);
+ xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) {
+ err = xe_exec_queue_contexts_hwsp_rebase(q, scratch);
+ if (err)
+ break;
+ }
mutex_unlock(&guc->submission_state.lock);
+
+ return err;
}
void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p);
void xe_guc_register_exec_queue(struct xe_exec_queue *q, int ctx_type);
-void xe_guc_contexts_hwsp_rebase(struct xe_guc *guc, void *scratch);
+int xe_guc_contexts_hwsp_rebase(struct xe_guc *guc, void *scratch);
#endif
#define LRC_PPHWSP_SIZE SZ_4K
#define LRC_INDIRECT_CTX_BO_SIZE SZ_4K
#define LRC_INDIRECT_RING_STATE_SIZE SZ_4K
-#define LRC_WA_BB_SIZE SZ_4K
/*
* Layout of the LRC and associated data allocated as
ssize_t remain;
if (state->lrc->bo->vmap.is_iomem) {
- state->buffer = kmalloc(state->max_size, GFP_KERNEL);
if (!state->buffer)
return -ENOMEM;
state->ptr = state->buffer;
} else {
state->ptr = state->lrc->bo->vmap.vaddr + state->offset;
- state->buffer = NULL;
}
remain = state->max_size / sizeof(u32);
return 0;
fail:
- kfree(state->buffer);
return -ENOSPC;
}
xe_map_memcpy_to(gt_to_xe(state->lrc->gt), &state->lrc->bo->vmap,
state->offset, state->buffer,
state->written * sizeof(u32));
- kfree(state->buffer);
}
-static int setup_wa_bb(struct xe_lrc *lrc, struct xe_hw_engine *hwe)
+/**
+ * xe_lrc_setup_wa_bb_with_scratch - Execute all wa bb setup callbacks.
+ * @lrc: the &xe_lrc struct instance
+ * @hwe: the &xe_hw_engine struct instance
+ * @scratch: preallocated scratch buffer for temporary storage
+ * Return: 0 on success, negative error code on failure
+ */
+int xe_lrc_setup_wa_bb_with_scratch(struct xe_lrc *lrc, struct xe_hw_engine *hwe, u32 *scratch)
{
static const struct bo_setup funcs[] = {
{ .setup = setup_timestamp_wa },
.lrc = lrc,
.hwe = hwe,
.max_size = LRC_WA_BB_SIZE,
+ .buffer = scratch,
.reserve_dw = 1,
.offset = __xe_lrc_wa_bb_offset(lrc),
.funcs = funcs,
return 0;
}
+static int setup_wa_bb(struct xe_lrc *lrc, struct xe_hw_engine *hwe)
+{
+ u32 *buf = NULL;
+ int ret;
+
+ if (lrc->bo->vmap.is_iomem)
+ buf = kmalloc(LRC_WA_BB_SIZE, GFP_KERNEL);
+
+ ret = xe_lrc_setup_wa_bb_with_scratch(lrc, hwe, buf);
+
+ kfree(buf);
+
+ return ret;
+}
+
static int
setup_indirect_ctx(struct xe_lrc *lrc, struct xe_hw_engine *hwe)
{
.lrc = lrc,
.hwe = hwe,
.max_size = (63 * 64) /* max 63 cachelines */,
+ .buffer = NULL,
.offset = __xe_lrc_indirect_ctx_offset(lrc),
};
int ret;
if (xe_gt_WARN_ON(lrc->gt, !state.funcs))
return 0;
+ if (lrc->bo->vmap.is_iomem)
+ state.buffer = kmalloc(state.max_size, GFP_KERNEL);
+
ret = setup_bo(&state);
- if (ret)
+ if (ret) {
+ kfree(state.buffer);
return ret;
+ }
/*
* Align to 64B cacheline so there's no garbage at the end for CS to
}
finish_bo(&state);
+ kfree(state.buffer);
xe_lrc_write_ctx_reg(lrc,
CTX_CS_INDIRECT_CTX,
#define LRC_PPHWSP_FLUSH_INVAL_SCRATCH_ADDR (0x34 * 4)
#define LRC_PPHWSP_PXP_INVAL_SCRATCH_ADDR (0x40 * 4)
+#define LRC_WA_BB_SIZE SZ_4K
+
#define XE_LRC_CREATE_RUNALONE 0x1
#define XE_LRC_CREATE_PXP 0x2
struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm,
u64 xe_lrc_ctx_timestamp(struct xe_lrc *lrc);
u32 xe_lrc_ctx_job_timestamp_ggtt_addr(struct xe_lrc *lrc);
u32 xe_lrc_ctx_job_timestamp(struct xe_lrc *lrc);
+int xe_lrc_setup_wa_bb_with_scratch(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
+ u32 *scratch);
/**
* xe_lrc_update_timestamp - readout LRC timestamp and update cached value
static size_t post_migration_scratch_size(struct xe_device *xe)
{
- return xe_lrc_reg_size(xe);
+ return max(xe_lrc_reg_size(xe), LRC_WA_BB_SIZE);
}
/**
return -ENOMEM;
err = xe_gt_sriov_vf_query_config(gt);
- if (err) {
- kfree(buf);
- return err;
- }
+ if (err)
+ goto out;
shift = xe_gt_sriov_vf_ggtt_shift(gt);
if (shift) {
xe_tile_sriov_vf_fixup_ggtt_nodes(gt_to_tile(gt), shift);
xe_gt_sriov_vf_default_lrcs_hwsp_rebase(gt);
- xe_guc_contexts_hwsp_rebase(>->uc.guc, buf);
+ err = xe_guc_contexts_hwsp_rebase(>->uc.guc, buf);
+ if (err)
+ goto out;
xe_guc_jobs_ring_rebase(>->uc.guc);
xe_guc_ct_fixup_messages_with_ggtt(>->uc.guc.ct, shift);
}
+out:
kfree(buf);
- return 0;
+ return err;
}
static void vf_post_migration_recovery(struct xe_device *xe)