From 11a64adcdbcc3028b96e440bc33fa76e2e825c10 Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Tue, 14 Jan 2025 12:38:53 -0800 Subject: [PATCH 01/16] drm/xe/xe3: Generate and store the L3 bank mask On Xe3, the register used to indicate which L3 banks are enabled on the system is a new one called MIRROR_L3BANK_ENABLE. Each bit represents one bank enabled in each node. Extend the existing topology code for Xe3 to read this register and generate the correct L3 bank mask, which can be read by user space throug the topology query. Bspec: 72573, 73439 Signed-off-by: Francois Dugast Signed-off-by: Matt Atwood Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20250114203853.35055-1-matthew.s.atwood@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 3 +++ drivers/gpu/drm/xe/xe_gt_topology.c | 16 +++++++++++++--- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index b4283ac030f4..096859072396 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -221,6 +221,9 @@ #define MIRROR_FUSE1 XE_REG(0x911c) +#define MIRROR_L3BANK_ENABLE XE_REG(0x9130) +#define XE3_L3BANK_ENABLE REG_GENMASK(31, 0) + #define XELP_EU_ENABLE XE_REG(0x9134) /* "_DISABLE" on Xe_LP */ #define XELP_EU_MASK REG_GENMASK(7, 0) #define XELP_GT_SLICE_ENABLE XE_REG(0x9138) diff --git a/drivers/gpu/drm/xe/xe_gt_topology.c b/drivers/gpu/drm/xe/xe_gt_topology.c index df2042db7ee6..516c81e3b8dd 100644 --- a/drivers/gpu/drm/xe/xe_gt_topology.c +++ b/drivers/gpu/drm/xe/xe_gt_topology.c @@ -129,7 +129,8 @@ static void load_l3_bank_mask(struct xe_gt *gt, xe_l3_bank_mask_t l3_bank_mask) { struct xe_device *xe = gt_to_xe(gt); - u32 fuse3 = xe_mmio_read32(>->mmio, MIRROR_FUSE3); + struct xe_mmio *mmio = >->mmio; + u32 fuse3 = xe_mmio_read32(mmio, MIRROR_FUSE3); /* * PTL platforms with media version 30.00 do not provide proper values @@ -143,7 +144,16 @@ load_l3_bank_mask(struct xe_gt *gt, xe_l3_bank_mask_t l3_bank_mask) if (XE_WA(gt, no_media_l3)) return; - if (GRAPHICS_VER(xe) >= 20) { + if (GRAPHICS_VER(xe) >= 30) { + xe_l3_bank_mask_t per_node = {}; + u32 meml3_en = REG_FIELD_GET(XE2_NODE_ENABLE_MASK, fuse3); + u32 mirror_l3bank_enable = xe_mmio_read32(mmio, MIRROR_L3BANK_ENABLE); + u32 bank_val = REG_FIELD_GET(XE3_L3BANK_ENABLE, mirror_l3bank_enable); + + bitmap_from_arr32(per_node, &bank_val, 32); + gen_l3_mask_from_pattern(xe, l3_bank_mask, per_node, 32, + meml3_en); + } else if (GRAPHICS_VER(xe) >= 20) { xe_l3_bank_mask_t per_node = {}; u32 meml3_en = REG_FIELD_GET(XE2_NODE_ENABLE_MASK, fuse3); u32 bank_val = REG_FIELD_GET(XE2_GT_L3_MODE_MASK, fuse3); @@ -155,7 +165,7 @@ load_l3_bank_mask(struct xe_gt *gt, xe_l3_bank_mask_t l3_bank_mask) xe_l3_bank_mask_t per_node = {}; xe_l3_bank_mask_t per_mask_bit = {}; u32 meml3_en = REG_FIELD_GET(MEML3_EN_MASK, fuse3); - u32 fuse4 = xe_mmio_read32(>->mmio, XEHP_FUSE4); + u32 fuse4 = xe_mmio_read32(mmio, XEHP_FUSE4); u32 bank_val = REG_FIELD_GET(GT_L3_EXC_MASK, fuse4); bitmap_set_value8(per_mask_bit, 0x3, 0); -- 2.51.0 From 174e9ce0daf6af791386e96e76e743eb59e8a401 Mon Sep 17 00:00:00 2001 From: John Harrison Date: Mon, 13 Jan 2025 11:44:04 -0800 Subject: [PATCH 02/16] drm/xe/guc: Drop error messages about missing GuC logs The GuC log snapshot code would complain loudly if there was no GuC log to take a snapshot of or if the snapshot alloc failed. Originally, this code was only called on demand when a user (or developer) explicitly requested a dump of the log. Hence an error message was useful. However, it is now part of the general devcoredump file and is called for any GPU hang. Most people don't care about GuC logs and GPU hangs do not generally mean a kernel/GuC bug. More importantly, there are valid situations where there is no GuC log, e.g. SRIOV VFs. So drop the error message. Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/3958 Signed-off-by: John Harrison Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20250113194405.2033085-1-John.C.Harrison@Intel.com --- drivers/gpu/drm/xe/xe_guc_log.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_log.c b/drivers/gpu/drm/xe/xe_guc_log.c index df4cfb698cdb..80151ff6a71f 100644 --- a/drivers/gpu/drm/xe/xe_guc_log.c +++ b/drivers/gpu/drm/xe/xe_guc_log.c @@ -149,16 +149,12 @@ struct xe_guc_log_snapshot *xe_guc_log_snapshot_capture(struct xe_guc_log *log, size_t remain; int i; - if (!log->bo) { - xe_gt_err(gt, "GuC log buffer not allocated\n"); + if (!log->bo) return NULL; - } snapshot = xe_guc_log_snapshot_alloc(log, atomic); - if (!snapshot) { - xe_gt_err(gt, "GuC log snapshot not allocated\n"); + if (!snapshot) return NULL; - } remain = snapshot->size; for (i = 0; i < snapshot->num_chunks; i++) { -- 2.51.0 From 75d37750a753e7ae079e470ea9699caeae756e3d Mon Sep 17 00:00:00 2001 From: Tejas Upadhyay Date: Mon, 13 Jan 2025 17:12:01 +0530 Subject: [PATCH 03/16] drm/xe/mmap: Add mmap support for PCI memory barrier In order to avoid having userspace to use MI_MEM_FENCE, we are adding a mechanism for userspace to generate a PCI memory barrier with low overhead (avoiding IOCTL call as well as writing to VRAM will adds some overhead). This is implemented by memory-mapping a page as uncached that is backed by MMIO on the dGPU and thus allowing userspace to do memory write to the page without invoking an IOCTL. We are selecting the MMIO so that it is not accessible from the PCI bus so that the MMIO writes themselves are ignored, but the PCI memory barrier will still take action as the MMIO filtering will happen after the memory barrier effect. When we detect special defined offset in mmap(), We are mapping 4K page which contains the last of page of doorbell MMIO range to userspace for same purpose. For user to query special offset we are adding special flag in mmap_offset ioctl which needs to be passed as follows, struct drm_xe_gem_mmap_offset mmo = { .handle = 0, /* this must be 0 */ .flags = DRM_XE_MMAP_OFFSET_FLAG_PCI_BARRIER, }; igt_ioctl(fd, DRM_IOCTL_XE_GEM_MMAP_OFFSET, &mmo); map = mmap(NULL, size, PROT_WRITE, MAP_SHARED, fd, mmo); IGT : https://gitlab.freedesktop.org/drm/igt-gpu-tools/-/commit/b2dbc6f22815128c0dd5c737504f42e1f1a6ad62 UMD : https://github.com/intel/compute-runtime/pull/772 V7: - Dgpu filter added V6(MAuld) - Move physical mmap to fault handler - Modify kernel-doc and attach UMD PR when ready V5(MAuld) - Return invalid early in case of non 4K PAGE_SIZE - Format kernel-doc and add note for 4K PAGE_SIZE HW limit V4(MAuld) - Add kernel-doc for uapi change - Restrict page size to 4K V3(MAuld) - Remove offset defination from UAPI to be able to change later - Edit commit message for special flag addition V2(MAuld) - Add fault handler with dummy page to handle unplug device - Add Build check for special offset to be below normal start page - Test d3hot, mapping seems to be valid in d3hot as well - Add more info to commit message Cc: Matthew Auld Acked-by: Michal Mrozek Reviewed-by: Matthew Auld Signed-off-by: Tejas Upadhyay Signed-off-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20250113114201.3178806-1-tejas.upadhyay@intel.com --- drivers/gpu/drm/xe/xe_bo.c | 19 +++++- drivers/gpu/drm/xe/xe_bo.h | 2 + drivers/gpu/drm/xe/xe_device.c | 107 ++++++++++++++++++++++++++++++++- include/uapi/drm/xe_drm.h | 29 ++++++++- 4 files changed, 154 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 4f077c11e8e2..78d19fd4670a 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -2278,9 +2278,26 @@ int xe_gem_mmap_offset_ioctl(struct drm_device *dev, void *data, XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) return -EINVAL; - if (XE_IOCTL_DBG(xe, args->flags)) + if (XE_IOCTL_DBG(xe, args->flags & + ~DRM_XE_MMAP_OFFSET_FLAG_PCI_BARRIER)) return -EINVAL; + if (args->flags & DRM_XE_MMAP_OFFSET_FLAG_PCI_BARRIER) { + if (XE_IOCTL_DBG(xe, !IS_DGFX(xe))) + return -EINVAL; + + if (XE_IOCTL_DBG(xe, args->handle)) + return -EINVAL; + + if (XE_IOCTL_DBG(xe, PAGE_SIZE > SZ_4K)) + return -EINVAL; + + BUILD_BUG_ON(((XE_PCI_BARRIER_MMAP_OFFSET >> XE_PTE_SHIFT) + + SZ_4K) >= DRM_FILE_PAGE_OFFSET_START); + args->offset = XE_PCI_BARRIER_MMAP_OFFSET; + return 0; + } + gem_obj = drm_gem_object_lookup(file, args->handle); if (XE_IOCTL_DBG(xe, !gem_obj)) return -ENOENT; diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index d9386ab03140..04995c5ced32 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -75,6 +75,8 @@ #define XE_BO_PROPS_INVALID (-1) +#define XE_PCI_BARRIER_MMAP_OFFSET (0x50 << XE_PTE_SHIFT) + struct sg_table; struct xe_bo *xe_bo_alloc(void); diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 5cbc96b214fe..6ecbf7dd396c 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -232,12 +232,117 @@ static long xe_drm_compat_ioctl(struct file *file, unsigned int cmd, unsigned lo #define xe_drm_compat_ioctl NULL #endif +static void barrier_open(struct vm_area_struct *vma) +{ + drm_dev_get(vma->vm_private_data); +} + +static void barrier_close(struct vm_area_struct *vma) +{ + drm_dev_put(vma->vm_private_data); +} + +static void barrier_release_dummy_page(struct drm_device *dev, void *res) +{ + struct page *dummy_page = (struct page *)res; + + __free_page(dummy_page); +} + +static vm_fault_t barrier_fault(struct vm_fault *vmf) +{ + struct drm_device *dev = vmf->vma->vm_private_data; + struct vm_area_struct *vma = vmf->vma; + vm_fault_t ret = VM_FAULT_NOPAGE; + pgprot_t prot; + int idx; + + prot = vm_get_page_prot(vma->vm_flags); + + if (drm_dev_enter(dev, &idx)) { + unsigned long pfn; + +#define LAST_DB_PAGE_OFFSET 0x7ff001 + pfn = PHYS_PFN(pci_resource_start(to_pci_dev(dev->dev), 0) + + LAST_DB_PAGE_OFFSET); + ret = vmf_insert_pfn_prot(vma, vma->vm_start, pfn, + pgprot_noncached(prot)); + drm_dev_exit(idx); + } else { + struct page *page; + + /* Allocate new dummy page to map all the VA range in this VMA to it*/ + page = alloc_page(GFP_KERNEL | __GFP_ZERO); + if (!page) + return VM_FAULT_OOM; + + /* Set the page to be freed using drmm release action */ + if (drmm_add_action_or_reset(dev, barrier_release_dummy_page, page)) + return VM_FAULT_OOM; + + ret = vmf_insert_pfn_prot(vma, vma->vm_start, page_to_pfn(page), + prot); + } + + return ret; +} + +static const struct vm_operations_struct vm_ops_barrier = { + .open = barrier_open, + .close = barrier_close, + .fault = barrier_fault, +}; + +static int xe_pci_barrier_mmap(struct file *filp, + struct vm_area_struct *vma) +{ + struct drm_file *priv = filp->private_data; + struct drm_device *dev = priv->minor->dev; + struct xe_device *xe = to_xe_device(dev); + + if (!IS_DGFX(xe)) + return -EINVAL; + + if (vma->vm_end - vma->vm_start > SZ_4K) + return -EINVAL; + + if (is_cow_mapping(vma->vm_flags)) + return -EINVAL; + + if (vma->vm_flags & (VM_READ | VM_EXEC)) + return -EINVAL; + + vm_flags_clear(vma, VM_MAYREAD | VM_MAYEXEC); + vm_flags_set(vma, VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | VM_IO); + vma->vm_ops = &vm_ops_barrier; + vma->vm_private_data = dev; + drm_dev_get(vma->vm_private_data); + + return 0; +} + +static int xe_mmap(struct file *filp, struct vm_area_struct *vma) +{ + struct drm_file *priv = filp->private_data; + struct drm_device *dev = priv->minor->dev; + + if (drm_dev_is_unplugged(dev)) + return -ENODEV; + + switch (vma->vm_pgoff) { + case XE_PCI_BARRIER_MMAP_OFFSET >> XE_PTE_SHIFT: + return xe_pci_barrier_mmap(filp, vma); + } + + return drm_gem_mmap(filp, vma); +} + static const struct file_operations xe_driver_fops = { .owner = THIS_MODULE, .open = drm_open, .release = drm_release_noglobal, .unlocked_ioctl = xe_drm_ioctl, - .mmap = drm_gem_mmap, + .mmap = xe_mmap, .poll = drm_poll, .read = drm_read, .compat_ioctl = xe_drm_compat_ioctl, diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index f62689ca861a..cac607a30f6d 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -811,6 +811,32 @@ struct drm_xe_gem_create { /** * struct drm_xe_gem_mmap_offset - Input of &DRM_IOCTL_XE_GEM_MMAP_OFFSET + * + * The @flags can be: + * - %DRM_XE_MMAP_OFFSET_FLAG_PCI_BARRIER - For user to query special offset + * for use in mmap ioctl. Writing to the returned mmap address will generate a + * PCI memory barrier with low overhead (avoiding IOCTL call as well as writing + * to VRAM which would also add overhead), acting like an MI_MEM_FENCE + * instruction. + * + * Note: The mmap size can be at most 4K, due to HW limitations. As a result + * this interface is only supported on CPU architectures that support 4K page + * size. The mmap_offset ioctl will detect this and gracefully return an + * error, where userspace is expected to have a different fallback method for + * triggering a barrier. + * + * Roughly the usage would be as follows: + * + * .. code-block:: C + * + * struct drm_xe_gem_mmap_offset mmo = { + * .handle = 0, // must be set to 0 + * .flags = DRM_XE_MMAP_OFFSET_FLAG_PCI_BARRIER, + * }; + * + * err = ioctl(fd, DRM_IOCTL_XE_GEM_MMAP_OFFSET, &mmo); + * map = mmap(NULL, size, PROT_WRITE, MAP_SHARED, fd, mmo.offset); + * map[i] = 0xdeadbeaf; // issue barrier */ struct drm_xe_gem_mmap_offset { /** @extensions: Pointer to the first extension struct, if any */ @@ -819,7 +845,8 @@ struct drm_xe_gem_mmap_offset { /** @handle: Handle for the object being mapped. */ __u32 handle; - /** @flags: Must be zero */ +#define DRM_XE_MMAP_OFFSET_FLAG_PCI_BARRIER (1 << 0) + /** @flags: Flags */ __u32 flags; /** @offset: The fake offset to use for subsequent mmap call */ -- 2.51.0 From 758debf35b9cda5450e40996991a6e4b222899bd Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Mon, 13 Jan 2025 16:25:07 -0800 Subject: [PATCH 04/16] drm/xe: Mark ComputeCS read mode as UC on iGPU MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit RING_CMD_CCTL read index should be UC on iGPU parts due to L3 caching structure. Having this as WB blocks ULLS from being enabled. Change to UC to unblock ULLS on iGPU. v2: - Drop internal communications commnet, bspec is updated Cc: Balasubramani Vivekanandan Cc: Michal Mrozek Cc: Paulo Zanoni Cc: José Roberto de Souza Cc: stable@vger.kernel.org Fixes: 328e089bfb37 ("drm/xe: Leverage ComputeCS read L3 caching") Signed-off-by: Matthew Brost Acked-by: Michal Mrozek Reviewed-by: Stuart Summers Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20250114002507.114087-1-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_hw_engine.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index ac9c666a9652..fc447751fe78 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -422,7 +422,7 @@ hw_engine_setup_default_state(struct xe_hw_engine *hwe) * Bspec: 72161 */ const u8 mocs_write_idx = gt->mocs.uc_index; - const u8 mocs_read_idx = hwe->class == XE_ENGINE_CLASS_COMPUTE && + const u8 mocs_read_idx = hwe->class == XE_ENGINE_CLASS_COMPUTE && IS_DGFX(xe) && (GRAPHICS_VER(xe) >= 20 || xe->info.platform == XE_PVC) ? gt->mocs.wb_index : gt->mocs.uc_index; u32 ring_cmd_cctl_val = REG_FIELD_PREP(CMD_CCTL_WRITE_OVERRIDE_MASK, mocs_write_idx) | -- 2.51.0 From 63060df6f709cbe494f0cfcaa613655862ba479a Mon Sep 17 00:00:00 2001 From: Oak Zeng Date: Wed, 18 Dec 2024 11:48:31 -0500 Subject: [PATCH 05/16] drm/xe: trace bo create Add a tracepoint to trace bo create. Signed-off-by: Oak Zeng Reviewed-by: Matthew Brost Reviewed-by: Himal Prasad Ghimiray Link: https://patchwork.freedesktop.org/patch/msgid/20241218164833.2364049-2-oak.zeng@intel.com Signed-off-by: Himal Prasad Ghimiray --- drivers/gpu/drm/xe/xe_bo.c | 1 + drivers/gpu/drm/xe/xe_trace_bo.h | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 78d19fd4670a..cf0dc9e9c53e 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -1659,6 +1659,7 @@ __xe_bo_create_locked(struct xe_device *xe, } } + trace_xe_bo_create(bo); return bo; err_unlock_put_bo: diff --git a/drivers/gpu/drm/xe/xe_trace_bo.h b/drivers/gpu/drm/xe/xe_trace_bo.h index ea50fee50c7d..3d7e6c80b0aa 100644 --- a/drivers/gpu/drm/xe/xe_trace_bo.h +++ b/drivers/gpu/drm/xe/xe_trace_bo.h @@ -53,6 +53,11 @@ DEFINE_EVENT(xe_bo, xe_bo_validate, TP_ARGS(bo) ); +DEFINE_EVENT(xe_bo, xe_bo_create, + TP_PROTO(struct xe_bo *bo), + TP_ARGS(bo) +); + TRACE_EVENT(xe_bo_move, TP_PROTO(struct xe_bo *bo, uint32_t new_placement, uint32_t old_placement, bool move_lacks_source), -- 2.51.0 From 861b27584d9055e4e1763341474ce8ce9dc6a55d Mon Sep 17 00:00:00 2001 From: Oak Zeng Date: Wed, 18 Dec 2024 11:48:32 -0500 Subject: [PATCH 06/16] drm/xe: Print vm flags in xe_vm trace print Print vm flags in xe_vm trace print. This is helpful to diagnosis the VM mode of operation. Signed-off-by: Oak Zeng Reviewed-by: Matthew Brost Reviewed-by: Himal Prasad Ghimiray Link: https://patchwork.freedesktop.org/patch/msgid/20241218164833.2364049-3-oak.zeng@intel.com Signed-off-by: Himal Prasad Ghimiray --- drivers/gpu/drm/xe/xe_trace_bo.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_trace_bo.h b/drivers/gpu/drm/xe/xe_trace_bo.h index 3d7e6c80b0aa..082fadb5f99b 100644 --- a/drivers/gpu/drm/xe/xe_trace_bo.h +++ b/drivers/gpu/drm/xe/xe_trace_bo.h @@ -190,16 +190,19 @@ DECLARE_EVENT_CLASS(xe_vm, __string(dev, __dev_name_vm(vm)) __field(struct xe_vm *, vm) __field(u32, asid) + __field(u32, flags) ), TP_fast_assign( __assign_str(dev); __entry->vm = vm; __entry->asid = vm->usm.asid; + __entry->flags = vm->flags; ), - TP_printk("dev=%s, vm=%p, asid=0x%05x", __get_str(dev), - __entry->vm, __entry->asid) + TP_printk("dev=%s, vm=%p, asid=0x%05x, vm flags=0x%05x", + __get_str(dev), __entry->vm, __entry->asid, + __entry->flags) ); DEFINE_EVENT(xe_vm, xe_vm_kill, -- 2.51.0 From 22b1a53f282b1ad6692c6238a7446275854f0afb Mon Sep 17 00:00:00 2001 From: Oak Zeng Date: Wed, 18 Dec 2024 11:48:33 -0500 Subject: [PATCH 07/16] drm/xe: Print vm parameter in xe_vma trace Print the vm that the vma belongs to in the vma trace. This is useful to correlate VMA operations to the VM. Signed-off-by: Oak Zeng Reviewed-by: Matthew Brost Reviewed-by: Himal Prasad Ghimiray Link: https://patchwork.freedesktop.org/patch/msgid/20241218164833.2364049-4-oak.zeng@intel.com Signed-off-by: Himal Prasad Ghimiray --- drivers/gpu/drm/xe/xe_trace_bo.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_trace_bo.h b/drivers/gpu/drm/xe/xe_trace_bo.h index 082fadb5f99b..ccebd5f0878e 100644 --- a/drivers/gpu/drm/xe/xe_trace_bo.h +++ b/drivers/gpu/drm/xe/xe_trace_bo.h @@ -92,6 +92,7 @@ DECLARE_EVENT_CLASS(xe_vma, TP_STRUCT__entry( __string(dev, __dev_name_vma(vma)) __field(struct xe_vma *, vma) + __field(struct xe_vm *, vm) __field(u32, asid) __field(u64, start) __field(u64, end) @@ -101,14 +102,16 @@ DECLARE_EVENT_CLASS(xe_vma, TP_fast_assign( __assign_str(dev); __entry->vma = vma; + __entry->vm = xe_vma_vm(vma); __entry->asid = xe_vma_vm(vma)->usm.asid; __entry->start = xe_vma_start(vma); __entry->end = xe_vma_end(vma) - 1; __entry->ptr = xe_vma_userptr(vma); ), - TP_printk("dev=%s, vma=%p, asid=0x%05x, start=0x%012llx, end=0x%012llx, userptr=0x%012llx,", - __get_str(dev), __entry->vma, __entry->asid, __entry->start, + TP_printk("dev=%s, vma=%p, vm=%p, asid=0x%05x, start=0x%012llx, end=0x%012llx, userptr=0x%012llx", + __get_str(dev), __entry->vma, __entry->vm, + __entry->asid, __entry->start, __entry->end, __entry->ptr) ) -- 2.51.0 From b824709ee1d0dbfed4b1757279c97fc0edad1e1a Mon Sep 17 00:00:00 2001 From: Oak Zeng Date: Mon, 13 Jan 2025 16:23:24 -0500 Subject: [PATCH 08/16] drm/xe: Fix a typo in xe_vm_doc.h s/vm->ttm.base.resv->lock/vm->gpuvm.r_obj->resv->lock Signed-off-by: Oak Zeng Reviewed-by: Maciej Patelczyk Link: https://patchwork.freedesktop.org/patch/msgid/20250113212324.3264218-1-oak.zeng@intel.com Signed-off-by: Himal Prasad Ghimiray --- drivers/gpu/drm/xe/xe_vm_doc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_vm_doc.h b/drivers/gpu/drm/xe/xe_vm_doc.h index 078786958403..1030ce214032 100644 --- a/drivers/gpu/drm/xe/xe_vm_doc.h +++ b/drivers/gpu/drm/xe/xe_vm_doc.h @@ -431,7 +431,7 @@ * bind path also acquires this lock in write while the exec / compute mode * rebind worker acquires this lock in read mode. * - * VM dma-resv lock (vm->ttm.base.resv->lock) - WW lock. Protects VM dma-resv + * VM dma-resv lock (vm->gpuvm.r_obj->resv->lock) - WW lock. Protects VM dma-resv * slots which is shared with any private BO in the VM. Expected to be acquired * during VM binds, execs, and compute mode rebind worker. This lock is also * held when private BOs are being evicted. -- 2.51.0 From 474c4dd29f666145dee7b5dce56d024a26e9550c Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Thu, 16 Jan 2025 13:45:32 +0100 Subject: [PATCH 09/16] drm/xe: Add missing SPDX license identifiers Ensure all Xe driver files have a proper SPDX license identifier, add it in files where it was missing. Link: https://patchwork.freedesktop.org/patch/msgid/20250116124532.1480351-1-francois.dugast@intel.com Signed-off-by: Francois Dugast Reviewed-by: Lucas De Marchi --- drivers/gpu/drm/xe/Kconfig.profile | 1 + drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/drivers/gpu/drm/xe/Kconfig.profile b/drivers/gpu/drm/xe/Kconfig.profile index ba17a25e8db3..7530df998148 100644 --- a/drivers/gpu/drm/xe/Kconfig.profile +++ b/drivers/gpu/drm/xe/Kconfig.profile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only config DRM_XE_JOB_TIMEOUT_MAX int "Default max job timeout (ms)" default 10000 # milliseconds diff --git a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h index 9c4cf050059a..41d39d67817a 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h @@ -1,3 +1,8 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2024 Intel Corporation + */ + #ifndef _I915_GEM_STOLEN_H_ #define _I915_GEM_STOLEN_H_ -- 2.51.0 From bbd8429264baf8bc3c40cefda048560ae0eb7890 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Thu, 14 Nov 2024 18:59:54 +0100 Subject: [PATCH 10/16] drm/xe: Always setup GT MMIO adjustment data While we believed that xe_gt_mmio_init() will be called just once per GT, this might not be a case due to some tweaks that need to performed by the VF driver during early probe. To avoid leaving any stale data in case of the re-run, reset the GT MMIO adjustment data for the non-media GT case. Signed-off-by: Michal Wajdeczko Cc: Matt Roper Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20241114175955.2299-2-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_gt.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 26e64530ada2..b5c313a3e946 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -643,6 +643,9 @@ void xe_gt_mmio_init(struct xe_gt *gt) if (gt->info.type == XE_GT_TYPE_MEDIA) { gt->mmio.adj_offset = MEDIA_GT_GSI_OFFSET; gt->mmio.adj_limit = MEDIA_GT_GSI_LENGTH; + } else { + gt->mmio.adj_offset = 0; + gt->mmio.adj_limit = 0; } if (IS_SRIOV_VF(gt_to_xe(gt))) -- 2.51.0 From 13265fe7426ec9ba5aa86baab913417ca361e8a4 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 14 Jan 2025 22:13:47 +0100 Subject: [PATCH 11/16] drm/xe/vf: Perform early GT MMIO initialization to read GMDID MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit VFs need to communicate with the GuC to obtain the GMDID value and existing GuC functions used for that assume that the GT has it's MMIO members already setup. However, due to recent refactoring the gt->mmio is initialized later, and any attempt by the VF to use xe_mmio_read|write() from GuC functions will lead to NPD crash due to unset MMIO register address: [] xe 0000:00:02.1: [drm] Running in SR-IOV VF mode [] xe 0000:00:02.1: [drm] GT0: sending H2G MMIO 0x5507 [] BUG: unable to handle page fault for address: 0000000000190240 Since we are already tweaking the id and type of the primary GT to mimic it's a Media GT before initializing the GuC communication, we can also call xe_gt_mmio_init() to perform early setup of the gt->mmio which will make those GuC functions work again. Signed-off-by: Michal Wajdeczko Cc: Matt Roper Cc: Piotr Piórkowski Reviewed-by: Piotr Piórkowski Link: https://patchwork.freedesktop.org/patch/msgid/20250114211347.1083-1-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_pci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 48d1c81d441e..bf35a18bf5e7 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -501,6 +501,7 @@ static void read_gmdid(struct xe_device *xe, enum xe_gmdid_type type, u32 *ver, gt->info.type = XE_GT_TYPE_MAIN; } + xe_gt_mmio_init(gt); xe_guc_comm_init_early(>->uc.guc); /* Don't bother with GMDID if failed to negotiate the GuC ABI */ -- 2.51.0 From 9cd3f4efc870463f17f6c29114c61fb6bfcaa291 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Fri, 20 Dec 2024 20:41:54 +0100 Subject: [PATCH 12/16] drm/xe/sa: Always call drm_suballoc_manager_fini() After successful call to drm_suballoc_manager_init() we should make sure to call drm_suballoc_manager_fini() as it may include some cleanup code even if we didn't start using it for real. As we can abort init() early due to kvzalloc() failure, we should either explicitly call drm_suballoc_manager_fini() or, even better, postpone drm_suballoc_manager_init() once we finish all other preparation steps, so we can rely on fini() that will do cleanup. Signed-off-by: Michal Wajdeczko Cc: Matthew Brost Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20241220194205.995-2-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_sa.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_sa.c b/drivers/gpu/drm/xe/xe_sa.c index e055bed7ae55..4e7aba445ebc 100644 --- a/drivers/gpu/drm/xe/xe_sa.c +++ b/drivers/gpu/drm/xe/xe_sa.c @@ -57,8 +57,6 @@ struct xe_sa_manager *xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u32 } sa_manager->bo = bo; sa_manager->is_iomem = bo->vmap.is_iomem; - - drm_suballoc_manager_init(&sa_manager->base, managed_size, align); sa_manager->gpu_addr = xe_bo_ggtt_addr(bo); if (bo->vmap.is_iomem) { @@ -72,6 +70,7 @@ struct xe_sa_manager *xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u32 memset(sa_manager->cpu_ptr, 0, bo->ttm.base.size); } + drm_suballoc_manager_init(&sa_manager->base, managed_size, align); ret = drmm_add_action_or_reset(&xe->drm, xe_sa_bo_manager_fini, sa_manager); if (ret) -- 2.51.0 From d29cddd49bed2c880e7c17724bcf3604e865c23a Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Fri, 20 Dec 2024 20:41:55 +0100 Subject: [PATCH 13/16] drm/xe/sa: Drop redundant NULL assignments The sa_manager is drmm_kzalloc'ed so all members are already zero. And in case of kvzalloc() failure we are not returning pointer to the sa_manager at all, so no point in resetting .bo member. Signed-off-by: Michal Wajdeczko Cc: Matthew Brost Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20241220194205.995-3-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_sa.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_sa.c b/drivers/gpu/drm/xe/xe_sa.c index 4e7aba445ebc..eb314ca75355 100644 --- a/drivers/gpu/drm/xe/xe_sa.c +++ b/drivers/gpu/drm/xe/xe_sa.c @@ -44,8 +44,6 @@ struct xe_sa_manager *xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u32 if (!sa_manager) return ERR_PTR(-ENOMEM); - sa_manager->bo = NULL; - bo = xe_managed_bo_create_pin_map(xe, tile, size, XE_BO_FLAG_VRAM_IF_DGFX(tile) | XE_BO_FLAG_GGTT | @@ -61,10 +59,8 @@ struct xe_sa_manager *xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u32 if (bo->vmap.is_iomem) { sa_manager->cpu_ptr = kvzalloc(managed_size, GFP_KERNEL); - if (!sa_manager->cpu_ptr) { - sa_manager->bo = NULL; + if (!sa_manager->cpu_ptr) return ERR_PTR(-ENOMEM); - } } else { sa_manager->cpu_ptr = bo->vmap.vaddr; memset(sa_manager->cpu_ptr, 0, bo->ttm.base.size); -- 2.51.0 From 97ee0e351f6ebbcb2a2dccdff726f75f728fede8 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Fri, 20 Dec 2024 20:41:56 +0100 Subject: [PATCH 14/16] drm/xe/sa: Improve error message on init failure Instead of raw errno value we can print friendly error code and also print size of the buffer object that we fail to prepare. Signed-off-by: Michal Wajdeczko Cc: Matthew Brost Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20241220194205.995-4-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_sa.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_sa.c b/drivers/gpu/drm/xe/xe_sa.c index eb314ca75355..0b87599759d7 100644 --- a/drivers/gpu/drm/xe/xe_sa.c +++ b/drivers/gpu/drm/xe/xe_sa.c @@ -49,8 +49,8 @@ struct xe_sa_manager *xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u32 XE_BO_FLAG_GGTT | XE_BO_FLAG_GGTT_INVALIDATE); if (IS_ERR(bo)) { - drm_err(&xe->drm, "failed to allocate bo for sa manager: %ld\n", - PTR_ERR(bo)); + drm_err(&xe->drm, "Failed to prepare %uKiB BO for SA manager (%pe)\n", + size / SZ_1K, bo); return ERR_CAST(bo); } sa_manager->bo = bo; -- 2.51.0 From 7e937cdf18164ea276ce0f4bbc5755e0031280e0 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Fri, 20 Dec 2024 20:41:57 +0100 Subject: [PATCH 15/16] drm/xe/sa: Tidy up coding style in init() There is no need to use tile_to_xe() since we already got the xe. And we should keep all variable declarations together, no need for separate sa_manager declaration. Signed-off-by: Michal Wajdeczko Cc: Matthew Brost Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20241220194205.995-5-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_sa.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_sa.c b/drivers/gpu/drm/xe/xe_sa.c index 0b87599759d7..0e78ae46667e 100644 --- a/drivers/gpu/drm/xe/xe_sa.c +++ b/drivers/gpu/drm/xe/xe_sa.c @@ -34,13 +34,12 @@ static void xe_sa_bo_manager_fini(struct drm_device *drm, void *arg) struct xe_sa_manager *xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u32 align) { struct xe_device *xe = tile_to_xe(tile); + struct xe_sa_manager *sa_manager; u32 managed_size = size - SZ_4K; struct xe_bo *bo; int ret; - struct xe_sa_manager *sa_manager = drmm_kzalloc(&tile_to_xe(tile)->drm, - sizeof(*sa_manager), - GFP_KERNEL); + sa_manager = drmm_kzalloc(&xe->drm, sizeof(*sa_manager), GFP_KERNEL); if (!sa_manager) return ERR_PTR(-ENOMEM); -- 2.51.0 From 0e1871f61e71d7611196b04d1b133f18fef666dd Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Fri, 20 Dec 2024 20:41:58 +0100 Subject: [PATCH 16/16] drm/xe/sa: Allow making suballocations using custom gfp flags Actual xe_sa_manager implementation uses hardcoded GFP_KERNEL flag during creation of suballocations but in upcoming patch we want to reuse the xe_sa_manager in places where GFP_KERNEL is not allowed. Add another variant of the xe_sa_bo_new() function that accepts arbitrary gfp flags. Signed-off-by: Michal Wajdeczko Cc: Matthew Brost Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20241220194205.995-6-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_sa.c | 15 ++++++++++++--- drivers/gpu/drm/xe/xe_sa.h | 19 +++++++++++++++++-- 2 files changed, 29 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_sa.c b/drivers/gpu/drm/xe/xe_sa.c index 0e78ae46667e..5f89e32b0640 100644 --- a/drivers/gpu/drm/xe/xe_sa.c +++ b/drivers/gpu/drm/xe/xe_sa.c @@ -74,8 +74,17 @@ struct xe_sa_manager *xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u32 return sa_manager; } -struct drm_suballoc *xe_sa_bo_new(struct xe_sa_manager *sa_manager, - unsigned int size) +/** + * __xe_sa_bo_new() - Make a suballocation but use custom gfp flags. + * @sa_manager: the &xe_sa_manager + * @size: number of bytes we want to suballocate + * @gfp: gfp flags used for memory allocation. Typically GFP_KERNEL. + * + * Try to make a suballocation of size @size. + * + * Return: a &drm_suballoc, or an ERR_PTR. + */ +struct drm_suballoc *__xe_sa_bo_new(struct xe_sa_manager *sa_manager, u32 size, gfp_t gfp) { /* * BB to large, return -ENOBUFS indicating user should split @@ -84,7 +93,7 @@ struct drm_suballoc *xe_sa_bo_new(struct xe_sa_manager *sa_manager, if (size > sa_manager->base.size) return ERR_PTR(-ENOBUFS); - return drm_suballoc_new(&sa_manager->base, size, GFP_KERNEL, true, 0); + return drm_suballoc_new(&sa_manager->base, size, gfp, true, 0); } void xe_sa_bo_flush_write(struct drm_suballoc *sa_bo) diff --git a/drivers/gpu/drm/xe/xe_sa.h b/drivers/gpu/drm/xe/xe_sa.h index 4e96483057d7..a0341eafbe77 100644 --- a/drivers/gpu/drm/xe/xe_sa.h +++ b/drivers/gpu/drm/xe/xe_sa.h @@ -5,6 +5,7 @@ #ifndef _XE_SA_H_ #define _XE_SA_H_ +#include #include "xe_sa_types.h" struct dma_fence; @@ -13,8 +14,22 @@ struct xe_tile; struct xe_sa_manager *xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u32 align); -struct drm_suballoc *xe_sa_bo_new(struct xe_sa_manager *sa_manager, - u32 size); +struct drm_suballoc *__xe_sa_bo_new(struct xe_sa_manager *sa_manager, u32 size, gfp_t gfp); + +/** + * xe_sa_bo_new() - Make a suballocation. + * @sa_manager: the &xe_sa_manager + * @size: number of bytes we want to suballocate + * + * Try to make a suballocation of size @size. + * + * Return: a &drm_suballoc, or an ERR_PTR. + */ +static inline struct drm_suballoc *xe_sa_bo_new(struct xe_sa_manager *sa_manager, u32 size) +{ + return __xe_sa_bo_new(sa_manager, size, GFP_KERNEL); +} + void xe_sa_bo_flush_write(struct drm_suballoc *sa_bo); void xe_sa_bo_free(struct drm_suballoc *sa_bo, struct dma_fence *fence); -- 2.51.0