struct amdgpu_ras_block_object *block_obj =
amdgpu_ras_get_ras_block(adev, obj->head.block, 0);
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ enum ras_event_type type = RAS_EVENT_TYPE_POISON_CONSUMPTION;
+ u64 event_id;
+ int ret;
if (!block_obj || !con)
return;
+ ret = amdgpu_ras_mark_ras_event(adev, type);
+ if (ret)
+ return;
+
/* both query_poison_status and handle_poison_consumption are optional,
* but at least one of them should be implemented if we need poison
* consumption handler
* For RMA case, amdgpu_umc_poison_handler will handle gpu reset.
*/
if (poison_stat && !con->is_rma) {
- dev_info(adev->dev, "GPU reset for %s RAS poison consumption is issued!\n",
- block_obj->ras_comm.name);
+ event_id = amdgpu_ras_acquire_event_id(adev, type);
+ RAS_EVENT_LOG(adev, event_id,
+ "GPU reset for %s RAS poison consumption is issued!\n",
+ block_obj->ras_comm.name);
amdgpu_ras_reset_gpu(adev);
}
if (amdgpu_ras_intr_triggered())
return RAS_EVENT_TYPE_FATAL;
else
- return RAS_EVENT_TYPE_INVALID;
+ return RAS_EVENT_TYPE_POISON_CONSUMPTION;
}
static void amdgpu_ras_do_recovery(struct work_struct *work)
switch (type) {
case RAS_EVENT_TYPE_FATAL:
case RAS_EVENT_TYPE_POISON_CREATION:
+ case RAS_EVENT_TYPE_POISON_CONSUMPTION:
event_mgr = __get_ras_event_mgr(adev);
if (!event_mgr)
return RAS_EVENT_INVALID_ID;
#include "soc15_int.h"
#include "kfd_device_queue_manager.h"
#include "kfd_smi_events.h"
+#include "amdgpu_ras.h"
/*
* GFX9 SQ Interrupts
uint16_t pasid, uint16_t client_id)
{
enum amdgpu_ras_block block = 0;
- int old_poison;
uint32_t reset = 0;
struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
+ enum ras_event_type type = RAS_EVENT_TYPE_POISON_CONSUMPTION;
+ u64 event_id;
+ int old_poison, ret;
if (!p)
return;
return;
}
+ ret = amdgpu_ras_mark_ras_event(dev->adev, type);
+ if (ret)
+ return;
+
kfd_signal_poison_consumed_event(dev, pasid);
- dev_warn(dev->adev->dev,
- "poison is consumed by client %d, kick off gpu reset flow\n", client_id);
+ event_id = amdgpu_ras_acquire_event_id(dev->adev, type);
+
+ RAS_EVENT_LOG(dev->adev, event_id,
+ "poison is consumed by client %d, kick off gpu reset flow\n", client_id);
amdgpu_amdkfd_ras_pasid_poison_consumption_handler(dev->adev,
block, pasid, NULL, NULL, reset);