]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
drm/xe/bmg: implement Wa_16023588340
authorMatthew Auld <matthew.auld@intel.com>
Wed, 3 Jul 2024 12:43:38 +0000 (13:43 +0100)
committerRodrigo Vivi <rodrigo.vivi@intel.com>
Mon, 19 Aug 2024 17:30:41 +0000 (13:30 -0400)
This involves enabling l2 caching of host side memory access to VRAM
through the CPU BAR. The main fallout here is with display since VRAM
writes from CPU can now be cached in GPU l2, and display is never
coherent with caches, so needs various manual flushing.  In the case of
fbc we disable it due to complications in getting this to work
correctly (in a later patch).

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Jonathan Cavitt <jonathan.cavitt@intel.com>
Cc: Matt Roper <matthew.d.roper@intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Cc: Vinod Govindapillai <vinod.govindapillai@intel.com>
Reviewed-by: Jonathan Cavitt <jonathan.cavitt@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240703124338.208220-3-matthew.auld@intel.com
(cherry picked from commit 01570b446939c3538b1aa3d059837f49fa14a3ae)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
drivers/gpu/drm/xe/Makefile
drivers/gpu/drm/xe/display/xe_dsb_buffer.c
drivers/gpu/drm/xe/display/xe_fb_pin.c
drivers/gpu/drm/xe/regs/xe_gt_regs.h
drivers/gpu/drm/xe/xe_device.c
drivers/gpu/drm/xe/xe_device.h
drivers/gpu/drm/xe/xe_gt.c
drivers/gpu/drm/xe/xe_pat.c
drivers/gpu/drm/xe/xe_wa_oob.rules

index 628c245c4822ebff7494e4764c93556eecf6cd8e..e97c9da451b36cfd93dcbf04a489ebcc839dabb2 100644 (file)
@@ -25,12 +25,14 @@ $(obj)/generated/%_wa_oob.c $(obj)/generated/%_wa_oob.h: $(obj)/xe_gen_wa_oob \
 
 uses_generated_oob := \
        $(obj)/xe_ggtt.o \
+       $(obj)/xe_device.o \
        $(obj)/xe_gsc.o \
        $(obj)/xe_gt.o \
        $(obj)/xe_guc.o \
        $(obj)/xe_guc_ads.o \
        $(obj)/xe_guc_pc.o \
        $(obj)/xe_migrate.o \
+       $(obj)/xe_pat.o \
        $(obj)/xe_ring_ops.o \
        $(obj)/xe_vm.o \
        $(obj)/xe_wa.o \
index 9e860c61f4b33f7ed19a579ab47406d6ec47ffb9..ccd0d87d438a3af400afb1b1d089f4f46b6fefcb 100644 (file)
@@ -7,6 +7,8 @@
 #include "intel_display_types.h"
 #include "intel_dsb_buffer.h"
 #include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_device_types.h"
 #include "xe_gt.h"
 
 u32 intel_dsb_buffer_ggtt_offset(struct intel_dsb_buffer *dsb_buf)
@@ -16,7 +18,10 @@ u32 intel_dsb_buffer_ggtt_offset(struct intel_dsb_buffer *dsb_buf)
 
 void intel_dsb_buffer_write(struct intel_dsb_buffer *dsb_buf, u32 idx, u32 val)
 {
+       struct xe_device *xe = dsb_buf->vma->bo->tile->xe;
+
        iosys_map_wr(&dsb_buf->vma->bo->vmap, idx * 4, u32, val);
+       xe_device_l2_flush(xe);
 }
 
 u32 intel_dsb_buffer_read(struct intel_dsb_buffer *dsb_buf, u32 idx)
@@ -26,9 +31,12 @@ u32 intel_dsb_buffer_read(struct intel_dsb_buffer *dsb_buf, u32 idx)
 
 void intel_dsb_buffer_memset(struct intel_dsb_buffer *dsb_buf, u32 idx, u32 val, size_t size)
 {
+       struct xe_device *xe = dsb_buf->vma->bo->tile->xe;
+
        WARN_ON(idx > (dsb_buf->buf_size - size) / sizeof(*dsb_buf->cmd_buf));
 
        iosys_map_memset(&dsb_buf->vma->bo->vmap, idx * 4, val, size);
+       xe_device_l2_flush(xe);
 }
 
 bool intel_dsb_buffer_create(struct intel_crtc *crtc, struct intel_dsb_buffer *dsb_buf, size_t size)
index 423f367c7065ed0386664b91c44a017aa22e96b6..d7db44e79eaf55f5227563c634dcb97e7735a6aa 100644 (file)
@@ -10,6 +10,7 @@
 #include "intel_fb.h"
 #include "intel_fb_pin.h"
 #include "xe_bo.h"
+#include "xe_device.h"
 #include "xe_ggtt.h"
 #include "xe_gt.h"
 #include "xe_pm.h"
@@ -304,6 +305,8 @@ static struct i915_vma *__xe_pin_fb_vma(const struct intel_framebuffer *fb,
        if (ret)
                goto err_unpin;
 
+       /* Ensure DPT writes are flushed */
+       xe_device_l2_flush(xe);
        return vma;
 
 err_unpin:
index d44564bad00949ae6b37fe7ece8162ddb62cc80a..fd9d94174efb126b9e55c5841d5ba4b218885062 100644 (file)
@@ -80,6 +80,9 @@
 #define   LE_CACHEABILITY_MASK                 REG_GENMASK(1, 0)
 #define   LE_CACHEABILITY(value)               REG_FIELD_PREP(LE_CACHEABILITY_MASK, value)
 
+#define XE2_GAMREQSTRM_CTRL                    XE_REG(0x4194)
+#define   CG_DIS_CNTLBUS                       REG_BIT(6)
+
 #define CCS_AUX_INV                            XE_REG(0x4208)
 
 #define VD0_AUX_INV                            XE_REG(0x4218)
 
 #define XEHPC_L3CLOS_MASK(i)                   XE_REG_MCR(0xb194 + (i) * 8)
 
+#define XE2_GLOBAL_INVAL                       XE_REG(0xb404)
+
+#define SCRATCH1LPFC                           XE_REG(0xb474)
+#define   EN_L3_RW_CCS_CACHE_FLUSH             REG_BIT(0)
+
 #define XE2LPM_L3SQCREG5                       XE_REG_MCR(0xb658)
 
 #define XE2_TDF_CTRL                           XE_REG(0xb418)
index f2f1d8ddb22138cb0e04df0635b01478b6a14bb2..6ce44ca2524de0c261a271b56551784495973cde 100644 (file)
@@ -54,6 +54,9 @@
 #include "xe_vm.h"
 #include "xe_vram.h"
 #include "xe_wait_user_fence.h"
+#include "xe_wa.h"
+
+#include <generated/xe_wa_oob.h>
 
 static int xe_file_open(struct drm_device *dev, struct drm_file *file)
 {
@@ -820,6 +823,11 @@ void xe_device_td_flush(struct xe_device *xe)
        if (!IS_DGFX(xe) || GRAPHICS_VER(xe) < 20)
                return;
 
+       if (XE_WA(xe_root_mmio_gt(xe), 16023588340)) {
+               xe_device_l2_flush(xe);
+               return;
+       }
+
        for_each_gt(gt, xe, id) {
                if (xe_gt_is_media_type(gt))
                        continue;
@@ -843,6 +851,28 @@ void xe_device_td_flush(struct xe_device *xe)
        }
 }
 
+void xe_device_l2_flush(struct xe_device *xe)
+{
+       struct xe_gt *gt;
+       int err;
+
+       gt = xe_root_mmio_gt(xe);
+
+       if (!XE_WA(gt, 16023588340))
+               return;
+
+       err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+       if (err)
+               return;
+
+       xe_mmio_write32(gt, XE2_GLOBAL_INVAL, 0x1);
+
+       if (xe_mmio_wait32(gt, XE2_GLOBAL_INVAL, 0x1, 0x0, 150, NULL, true))
+               xe_gt_err_once(gt, "Global invalidation timeout\n");
+
+       xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+}
+
 u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size)
 {
        return xe_device_has_flat_ccs(xe) ?
index b3952718b3c1cb165521960d84fda91d3ce1be9e..533ccfb2567a2c320699cc1136dfaa4d4a45dce4 100644 (file)
@@ -162,6 +162,7 @@ u64 xe_device_canonicalize_addr(struct xe_device *xe, u64 address);
 u64 xe_device_uncanonicalize_addr(struct xe_device *xe, u64 address);
 
 void xe_device_td_flush(struct xe_device *xe);
+void xe_device_l2_flush(struct xe_device *xe);
 
 static inline bool xe_device_wedged(struct xe_device *xe)
 {
index 31b2e64c70c6abf9bc489eaf41d9f30b997ab766..816ecc9e294ce99cee633bc226ec32c7dedc28d4 100644 (file)
@@ -11,6 +11,8 @@
 #include <drm/xe_drm.h>
 #include <generated/xe_wa_oob.h>
 
+#include <generated/xe_wa_oob.h>
+
 #include "instructions/xe_gfxpipe_commands.h"
 #include "instructions/xe_mi_commands.h"
 #include "regs/xe_gt_regs.h"
@@ -95,6 +97,51 @@ void xe_gt_sanitize(struct xe_gt *gt)
        gt->uc.guc.submission_state.enabled = false;
 }
 
+static void xe_gt_enable_host_l2_vram(struct xe_gt *gt)
+{
+       u32 reg;
+       int err;
+
+       if (!XE_WA(gt, 16023588340))
+               return;
+
+       err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+       if (WARN_ON(err))
+               return;
+
+       if (!xe_gt_is_media_type(gt)) {
+               xe_mmio_write32(gt, SCRATCH1LPFC, EN_L3_RW_CCS_CACHE_FLUSH);
+               reg = xe_mmio_read32(gt, XE2_GAMREQSTRM_CTRL);
+               reg |= CG_DIS_CNTLBUS;
+               xe_mmio_write32(gt, XE2_GAMREQSTRM_CTRL, reg);
+       }
+
+       xe_gt_mcr_multicast_write(gt, XEHPC_L3CLOS_MASK(3), 0x3);
+       xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+}
+
+static void xe_gt_disable_host_l2_vram(struct xe_gt *gt)
+{
+       u32 reg;
+       int err;
+
+       if (!XE_WA(gt, 16023588340))
+               return;
+
+       if (xe_gt_is_media_type(gt))
+               return;
+
+       err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+       if (WARN_ON(err))
+               return;
+
+       reg = xe_mmio_read32(gt, XE2_GAMREQSTRM_CTRL);
+       reg &= ~CG_DIS_CNTLBUS;
+       xe_mmio_write32(gt, XE2_GAMREQSTRM_CTRL, reg);
+
+       xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+}
+
 /**
  * xe_gt_remove() - Clean up the GT structures before driver removal
  * @gt: the GT object
@@ -111,6 +158,8 @@ void xe_gt_remove(struct xe_gt *gt)
 
        for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i)
                xe_hw_fence_irq_finish(&gt->fence_irq[i]);
+
+       xe_gt_disable_host_l2_vram(gt);
 }
 
 static void gt_reset_worker(struct work_struct *w);
@@ -508,6 +557,7 @@ int xe_gt_init_hwconfig(struct xe_gt *gt)
 
        xe_gt_mcr_init_early(gt);
        xe_pat_init(gt);
+       xe_gt_enable_host_l2_vram(gt);
 
        err = xe_uc_init(&gt->uc);
        if (err)
@@ -643,6 +693,8 @@ static int do_gt_restart(struct xe_gt *gt)
 
        xe_pat_init(gt);
 
+       xe_gt_enable_host_l2_vram(gt);
+
        xe_gt_mcr_set_implicit_defaults(gt);
        xe_reg_sr_apply_mmio(&gt->reg_sr, gt);
 
@@ -796,6 +848,8 @@ int xe_gt_suspend(struct xe_gt *gt)
 
        xe_gt_idle_disable_pg(gt);
 
+       xe_gt_disable_host_l2_vram(gt);
+
        XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
        xe_gt_dbg(gt, "suspended\n");
 
index 4ee32ee1cc885fc939a81cc41599726b030d845c..722278cc23fc5d7058452a1a3d1b472bf8bbffa4 100644 (file)
@@ -7,6 +7,8 @@
 
 #include <drm/xe_drm.h>
 
+#include <generated/xe_wa_oob.h>
+
 #include "regs/xe_reg_defs.h"
 #include "xe_assert.h"
 #include "xe_device.h"
@@ -15,6 +17,7 @@
 #include "xe_gt_mcr.h"
 #include "xe_mmio.h"
 #include "xe_sriov.h"
+#include "xe_wa.h"
 
 #define _PAT_ATS                               0x47fc
 #define _PAT_INDEX(index)                      _PICK_EVEN_2RANGES(index, 8, \
@@ -382,7 +385,13 @@ void xe_pat_init_early(struct xe_device *xe)
        if (GRAPHICS_VER(xe) == 20) {
                xe->pat.ops = &xe2_pat_ops;
                xe->pat.table = xe2_pat_table;
-               xe->pat.n_entries = ARRAY_SIZE(xe2_pat_table);
+
+               /* Wa_16023588340. XXX: Should use XE_WA */
+               if (GRAPHICS_VERx100(xe) == 2001)
+                       xe->pat.n_entries = 28; /* Disable CLOS3 */
+               else
+                       xe->pat.n_entries = ARRAY_SIZE(xe2_pat_table);
+
                xe->pat.idx[XE_CACHE_NONE] = 3;
                xe->pat.idx[XE_CACHE_WT] = 15;
                xe->pat.idx[XE_CACHE_WB] = 2;
index 26066beb4f6f5dc813fc8e4ad58045563dc51c30..08f7336881e32dfd083c328c879f3fbfc48be495 100644 (file)
@@ -29,3 +29,4 @@
 13011645652    GRAPHICS_VERSION(2004)
 22019338487    MEDIA_VERSION(2000)
                GRAPHICS_VERSION(2001)
+16023588340    GRAPHICS_VERSION(2001)