]> www.infradead.org Git - users/dwmw2/linux.git/commitdiff
drm/amdgpu: support utcl2 RAS poison query for mmhub
authorTao Zhou <tao.zhou1@amd.com>
Mon, 11 Mar 2024 08:53:53 +0000 (16:53 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Wed, 20 Mar 2024 17:38:14 +0000 (13:38 -0400)
Support the query for both gfxhub and mmhub, also replace
xcc_id with hub_inst.

Signed-off-by: Tao Zhou <tao.zhou1@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c
drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c

index d5fde8adf19b096d4da70a6e0ee10cb8a970c4ee..8ee18c2c082a98542235ca465043bd400c12d373 100644 (file)
@@ -770,12 +770,19 @@ int amdgpu_amdkfd_send_close_event_drain_irq(struct amdgpu_device *adev,
 }
 
 bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev,
-                       int xcc_id)
+                       int hub_inst, int hub_type)
 {
-       if (adev->gfxhub.funcs->query_utcl2_poison_status)
-               return adev->gfxhub.funcs->query_utcl2_poison_status(adev, xcc_id);
-       else
-               return false;
+       if (!hub_type) {
+               if (adev->gfxhub.funcs->query_utcl2_poison_status)
+                       return adev->gfxhub.funcs->query_utcl2_poison_status(adev, hub_inst);
+               else
+                       return false;
+       } else {
+               if (adev->mmhub.funcs->query_utcl2_poison_status)
+                       return adev->mmhub.funcs->query_utcl2_poison_status(adev, hub_inst);
+               else
+                       return false;
+       }
 }
 
 int amdgpu_amdkfd_check_and_lock_kfd(struct amdgpu_device *adev)
index caee36e52a09ba0535bdbb72680950395c815cf0..6b67f00259663d9ed6b9a9b4027b72b6273b6f7a 100644 (file)
@@ -342,7 +342,7 @@ bool amdgpu_amdkfd_bo_mapped_to_dev(struct amdgpu_device *adev, struct kgd_mem *
 void amdgpu_amdkfd_block_mmu_notifications(void *p);
 int amdgpu_amdkfd_criu_resume(void *p);
 bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev,
-                       int xcc_id);
+                       int hub_inst, int hub_type);
 int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
                uint64_t size, u32 alloc_flag, int8_t xcp_id);
 void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev,
index 05d52b9274a9d18c921a13d93d026165af487211..470a146f2f43a8e293110d4955eddadf791b1a27 100644 (file)
@@ -666,10 +666,9 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
        rw = REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, RW);
        fed = REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, FED);
 
-       /* for gfx fed error, kfd will handle it, return directly */
+       /* for fed error, kfd will handle it, return directly */
        if (fed && amdgpu_ras_is_poison_mode_supported(adev) &&
-           (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 4, 2)) &&
-           (vmhub < AMDGPU_MMHUB0_START))
+           (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 4, 2)))
                return 0;
 
        WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
index a8e76287dde0bbbe54c0de29979f7c3656d9a111..650da18b0d87b3289d4c8be3ce70f54e34f3ecf0 100644 (file)
@@ -369,18 +369,23 @@ static void event_interrupt_wq_v10(struct kfd_node *dev,
                uint16_t ring_id = SOC15_RING_ID_FROM_IH_ENTRY(ih_ring_entry);
                uint32_t node_id = SOC15_NODEID_FROM_IH_ENTRY(ih_ring_entry);
                uint32_t vmid_type = SOC15_VMID_TYPE_FROM_IH_ENTRY(ih_ring_entry);
-               int xcc_id = 0;
+               int hub_inst = 0;
                struct kfd_hsa_memory_exception_data exception_data;
 
+               /* gfxhub */
                if (!vmid_type && dev->adev->gfx.funcs->ih_node_to_logical_xcc) {
-                       xcc_id = dev->adev->gfx.funcs->ih_node_to_logical_xcc(dev->adev,
+                       hub_inst = dev->adev->gfx.funcs->ih_node_to_logical_xcc(dev->adev,
                                node_id);
-                       if (xcc_id < 0)
-                               xcc_id = 0;
+                       if (hub_inst < 0)
+                               hub_inst = 0;
                }
 
-               if (client_id == SOC15_IH_CLIENTID_UTCL2 && !vmid_type &&
-                   amdgpu_amdkfd_ras_query_utcl2_poison_status(dev->adev, xcc_id)) {
+               /* mmhub */
+               if (vmid_type && client_id == SOC15_IH_CLIENTID_VMC)
+                       hub_inst = node_id / 4;
+
+               if (amdgpu_amdkfd_ras_query_utcl2_poison_status(dev->adev,
+                                       hub_inst, vmid_type)) {
                        event_interrupt_poison_consumption(dev, pasid, client_id);
                        return;
                }
index ff73923367957191b45d7aa669b4f103df5335b1..11641f4645e6c1e049ec7ba735352615436e03b9 100644 (file)
@@ -415,18 +415,23 @@ static void event_interrupt_wq_v9(struct kfd_node *dev,
                uint16_t ring_id = SOC15_RING_ID_FROM_IH_ENTRY(ih_ring_entry);
                uint32_t node_id = SOC15_NODEID_FROM_IH_ENTRY(ih_ring_entry);
                uint32_t vmid_type = SOC15_VMID_TYPE_FROM_IH_ENTRY(ih_ring_entry);
-               int xcc_id = 0;
+               int hub_inst = 0;
                struct kfd_hsa_memory_exception_data exception_data;
 
+               /* gfxhub */
                if (!vmid_type && dev->adev->gfx.funcs->ih_node_to_logical_xcc) {
-                       xcc_id = dev->adev->gfx.funcs->ih_node_to_logical_xcc(dev->adev,
+                       hub_inst = dev->adev->gfx.funcs->ih_node_to_logical_xcc(dev->adev,
                                node_id);
-                       if (xcc_id < 0)
-                               xcc_id = 0;
+                       if (hub_inst < 0)
+                               hub_inst = 0;
                }
 
-               if (client_id == SOC15_IH_CLIENTID_UTCL2 && !vmid_type &&
-                   amdgpu_amdkfd_ras_query_utcl2_poison_status(dev->adev, xcc_id)) {
+               /* mmhub */
+               if (vmid_type && client_id == SOC15_IH_CLIENTID_VMC)
+                       hub_inst = node_id / 4;
+
+               if (amdgpu_amdkfd_ras_query_utcl2_poison_status(dev->adev,
+                                       hub_inst, vmid_type)) {
                        event_interrupt_poison_consumption_v9(dev, pasid, client_id);
                        return;
                }