#include "edac_core.h"
#include "edac_module.h"
+#define CREATE_TRACE_POINTS
+#define TRACE_INCLUDE_PATH ../../include/ras
+#include <ras/ras_event.h>
+
/* lock to memory controller's control array */
static DEFINE_MUTEX(mem_ctls_mutex);
static LIST_HEAD(mc_devices);
* which will perform kobj unregistration and the actual free
* will occur during the kobject callback operation
*/
+
return mci;
}
EXPORT_SYMBOL_GPL(edac_mc_alloc);
}
#define OTHER_LABEL " or "
+
+/**
+ * edac_mc_handle_error - reports a memory event to userspace
+ *
+ * @type: severity of the error (CE/UE/Fatal)
+ * @mci: a struct mem_ctl_info pointer
+ * @page_frame_number: mem page where the error occurred
+ * @offset_in_page: offset of the error inside the page
+ * @syndrome: ECC syndrome
+ * @layer0: Memory layer0 position
+ * @layer1: Memory layer2 position
+ * @layer2: Memory layer3 position
+ * @msg: Message meaningful to the end users that
+ * explains the event
+ * @other_detail: Technical details about the event that
+ * may help hardware manufacturers and
+ * EDAC developers to analyse the event
+ * @arch_log: Architecture-specific struct that can
+ * be used to add extended information to the
+ * tracepoint, like dumping MCE registers.
+ */
void edac_mc_handle_error(const enum hw_event_mc_err_type type,
struct mem_ctl_info *mci,
const unsigned long page_frame_number,
const int layer2,
const char *msg,
const char *other_detail,
- const void *mcelog)
+ const void *arch_log)
{
/* FIXME: too much for stack: move it to some pre-alocated area */
char detail[80], location[80];
}
/* Memory type dependent details about the error */
- if (type == HW_EVENT_ERR_CORRECTED) {
+ if (type == HW_EVENT_ERR_CORRECTED)
snprintf(detail, sizeof(detail),
"page:0x%lx offset:0x%lx grain:%d syndrome:0x%lx",
page_frame_number, offset_in_page,
grain, syndrome);
- edac_ce_error(mci, pos, msg, location, label, detail,
- other_detail, enable_per_layer_report,
- page_frame_number, offset_in_page, grain);
- } else {
+ else
snprintf(detail, sizeof(detail),
"page:0x%lx offset:0x%lx grain:%d",
page_frame_number, offset_in_page, grain);
+ /* Report the error via the trace interface */
+ trace_mc_event(type, mci->mc_idx, msg, label, location,
+ detail, other_detail);
+
+ /* Report the error via the edac_mc_printk() interface */
+ if (type == HW_EVENT_ERR_CORRECTED)
+ edac_ce_error(mci, pos, msg, location, label, detail,
+ other_detail, enable_per_layer_report,
+ page_frame_number, offset_in_page, grain);
+ else
edac_ue_error(mci, pos, msg, location, label, detail,
other_detail, enable_per_layer_report);
- }
}
EXPORT_SYMBOL_GPL(edac_mc_handle_error);
--- /dev/null
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM ras
+#define TRACE_INCLUDE_FILE ras_event
+
+#if !defined(_TRACE_HW_EVENT_MC_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_HW_EVENT_MC_H
+
+#include <linux/tracepoint.h>
+#include <linux/edac.h>
+#include <linux/ktime.h>
+
+/*
+ * Hardware Events Report
+ *
+ * Those events are generated when hardware detected a corrected or
+ * uncorrected event, and are meant to replace the current API to report
+ * errors defined on both EDAC and MCE subsystems.
+ *
+ * FIXME: Add events for handling memory errors originated from the
+ * MCE subsystem.
+ */
+
+/*
+ * Hardware-independent Memory Controller specific events
+ */
+
+/*
+ * Default error mechanisms for Memory Controller errors (CE and UE)
+ */
+TRACE_EVENT(mc_event,
+
+ TP_PROTO(const unsigned int err_type,
+ const unsigned int mc_index,
+ const char *error_msg,
+ const char *label,
+ const char *location,
+ const char *core_detail,
+ const char *driver_detail),
+
+ TP_ARGS(err_type, mc_index, error_msg, label, location,
+ core_detail, driver_detail),
+
+ TP_STRUCT__entry(
+ __field( unsigned int, err_type )
+ __field( unsigned int, mc_index )
+ __string( msg, error_msg )
+ __string( label, label )
+ __string( detail, core_detail )
+ __string( location, location )
+ __string( driver_detail, driver_detail )
+ ),
+
+ TP_fast_assign(
+ __entry->err_type = err_type;
+ __entry->mc_index = mc_index;
+ __assign_str(msg, error_msg);
+ __assign_str(label, label);
+ __assign_str(location, location);
+ __assign_str(detail, core_detail);
+ __assign_str(driver_detail, driver_detail);
+ ),
+
+ TP_printk("%s error:%s on memory stick \"%s\" (mc:%d %s %s %s)",
+ (__entry->err_type == HW_EVENT_ERR_CORRECTED) ? "Corrected" :
+ ((__entry->err_type == HW_EVENT_ERR_FATAL) ?
+ "Fatal" : "Uncorrected"),
+ __get_str(msg),
+ __get_str(label),
+ __entry->mc_index,
+ __get_str(location),
+ __get_str(detail),
+ __get_str(driver_detail))
+);
+
+#endif /* _TRACE_HW_EVENT_MC_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>