if WITH_EXTLOG
rasdaemon_SOURCES += ras-extlog-handler.c
endif
+if WITH_DEVLINK
+ rasdaemon_SOURCES += ras-devlink-handler.c
+endif
if WITH_ABRT_REPORT
rasdaemon_SOURCES += ras-report.c
endif
include_HEADERS = config.h ras-events.h ras-logger.h ras-mc-handler.h \
ras-aer-handler.h ras-mce-handler.h ras-record.h bitfield.h ras-report.h \
- ras-extlog-handler.h ras-arm-handler.h ras-non-standard-handler.h
+ ras-extlog-handler.h ras-arm-handler.h ras-non-standard-handler.h \
+ ras-devlink-handler.h
# This rule can't be called with more than one Makefile job (like make -j8)
# I can't figure out a way to fix that
])
AM_CONDITIONAL([WITH_EXTLOG], [test x$enable_extlog = xyes])
+AC_ARG_ENABLE([devlink],
+ AS_HELP_STRING([--enable-devlink], [enable devlink health events (currently experimental)]))
+
+AS_IF([test "x$enable_devlink" = "xyes"], [
+ AC_DEFINE(HAVE_DEVLINK,1,"have devlink health events collect")
+ AC_SUBST([WITH_DEVLINK])
+])
+AM_CONDITIONAL([WITH_DEVLINK], [test x$enable_devlink = xyes])
+
AC_ARG_ENABLE([abrt_report],
AS_HELP_STRING([--enable-abrt-report], [enable report event to ABRT (currently experimental)]))
ABRT report : $enable_abrt_report
HIP07 SAS HW errors : $enable_hisi_ns_decode
ARM events : $enable_arm
+ DEVLINK : $enable_devlink
EOF
--- /dev/null
+/*
+ * Copyright (C) 2019 Cong Wang <xiyou.wangcong@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+*/
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include "libtrace/kbuffer.h"
+#include "ras-devlink-handler.h"
+#include "ras-record.h"
+#include "ras-logger.h"
+#include "ras-report.h"
+
+int ras_devlink_event_handler(struct trace_seq *s,
+ struct pevent_record *record,
+ struct event_format *event, void *context)
+{
+ int len;
+ struct ras_events *ras = context;
+ time_t now;
+ struct tm *tm;
+ struct devlink_event ev;
+
+ /*
+ * Newer kernels (3.10-rc1 or upper) provide an uptime clock.
+ * On previous kernels, the way to properly generate an event would
+ * be to inject a fake one, measure its timestamp and diff it against
+ * gettimeofday. We won't do it here. Instead, let's use uptime,
+ * falling-back to the event report's time, if "uptime" clock is
+ * not available (legacy kernels).
+ */
+
+ if (ras->use_uptime)
+ now = record->ts/user_hz + ras->uptime_diff;
+ else
+ now = time(NULL);
+
+ tm = localtime(&now);
+ if (tm)
+ strftime(ev.timestamp, sizeof(ev.timestamp),
+ "%Y-%m-%d %H:%M:%S %z", tm);
+ trace_seq_printf(s, "%s ", ev.timestamp);
+
+ ev.bus_name = pevent_get_field_raw(s, event, "bus_name",
+ record, &len, 1);
+ if (!ev.bus_name)
+ return -1;
+
+ ev.dev_name = pevent_get_field_raw(s, event, "dev_name",
+ record, &len, 1);
+ if (!ev.dev_name)
+ return -1;
+
+ ev.driver_name = pevent_get_field_raw(s, event, "driver_name",
+ record, &len, 1);
+ if (!ev.driver_name)
+ return -1;
+
+ ev.reporter_name = pevent_get_field_raw(s, event, "reporter_name",
+ record, &len, 1);
+ if (!ev.reporter_name)
+ return -1;
+
+ ev.msg = pevent_get_field_raw(s, event, "msg", record, &len, 1);
+ if (!ev.msg)
+ return -1;
+
+ /* Insert data into the SGBD */
+#ifdef HAVE_SQLITE3
+ ras_store_devlink_event(ras, &ev);
+#endif
+
+#ifdef HAVE_ABRT_REPORT
+ /* Report event to ABRT */
+ ras_report_devlink_event(ras, &ev);
+#endif
+
+ return 0;
+}
--- /dev/null
+/*
+ * Copyright (C) 2019 Cong Wang <xiyou.wangcong@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+*/
+
+#ifndef __RAS_DEVLINK_HANDLER_H
+#define __RAS_DEVLINK_HANDLER_H
+
+#include "ras-events.h"
+#include "libtrace/event-parse.h"
+
+int ras_devlink_event_handler(struct trace_seq *s,
+ struct pevent_record *record,
+ struct event_format *event, void *context);
+
+#endif
#include "ras-arm-handler.h"
#include "ras-mce-handler.h"
#include "ras-extlog-handler.h"
+#include "ras-devlink-handler.h"
#include "ras-record.h"
#include "ras-logger.h"
rc |= __toggle_ras_mc_event(ras, "ras", "arm_event", enable);
#endif
+#ifdef HAVE_DEVLINK
+ rc |= __toggle_ras_mc_event(ras, "devlink", "devlink_health_report", enable);
+#endif
+
free_ras:
free(ras);
return rc;
"ras", "aer_event");
#endif
+#ifdef HAVE_DEVLINK
+ rc = add_event_handler(ras, pevent, page_size, "devlink",
+ "devlink_health_report",
+ ras_devlink_event_handler);
+ if (!rc)
+ num_events++;
+ else
+ log(ALL, LOG_ERR, "Can't get traces from %s:%s\n",
+ "devlink", "devlink_health_report");
+#endif
+
if (!num_events) {
log(ALL, LOG_INFO,
"Failed to trace all supported RAS events. Aborting.\n");
}
#endif
+/*
+ * Table and functions to handle devlink:devlink_health_report
+ */
+
+#ifdef HAVE_DEVLINK
+static const struct db_fields devlink_event_fields[] = {
+ { .name="id", .type="INTEGER PRIMARY KEY" },
+ { .name="timestamp", .type="TEXT" },
+ { .name="bus_name", .type="TEXT" },
+ { .name="dev_name", .type="TEXT" },
+ { .name="driver_name", .type="TEXT" },
+ { .name="reporter_name", .type="TEXT" },
+ { .name="msg", .type="TEXT" },
+};
+
+static const struct db_table_descriptor devlink_event_tab = {
+ .name = "devlink_event",
+ .fields = devlink_event_fields,
+ .num_fields = ARRAY_SIZE(devlink_event_fields),
+};
+
+int ras_store_devlink_event(struct ras_events *ras, struct devlink_event *ev)
+{
+ int rc;
+ struct sqlite3_priv *priv = ras->db_priv;
+
+ if (!priv || !priv->stmt_devlink_event)
+ return 0;
+ log(TERM, LOG_INFO, "devlink_event store: %p\n", priv->stmt_devlink_event);
+
+ sqlite3_bind_text(priv->stmt_devlink_event, 1, ev->timestamp, -1, NULL);
+ sqlite3_bind_text(priv->stmt_devlink_event, 2, ev->bus_name, -1, NULL);
+ sqlite3_bind_text(priv->stmt_devlink_event, 3, ev->dev_name, -1, NULL);
+ sqlite3_bind_text(priv->stmt_devlink_event, 4, ev->driver_name, -1, NULL);
+ sqlite3_bind_text(priv->stmt_devlink_event, 5, ev->reporter_name, -1, NULL);
+ sqlite3_bind_text(priv->stmt_devlink_event, 6, ev->msg, -1, NULL);
+
+ rc = sqlite3_step(priv->stmt_devlink_event);
+ if (rc != SQLITE_OK && rc != SQLITE_DONE)
+ log(TERM, LOG_ERR,
+ "Failed to do devlink_event step on sqlite: error = %d\n", rc);
+ rc = sqlite3_reset(priv->stmt_devlink_event);
+ if (rc != SQLITE_OK && rc != SQLITE_DONE)
+ log(TERM, LOG_ERR,
+ "Failed reset devlink_event on sqlite: error = %d\n",
+ rc);
+ log(TERM, LOG_INFO, "register inserted at db\n");
+
+ return rc;
+}
+#endif
/*
* Generic code
rc = ras_mc_prepare_stmt(priv, &priv->stmt_arm_record,
&arm_event_tab);
#endif
+#ifdef HAVE_DEVLINK
+ rc = ras_mc_create_table(priv, &devlink_event_tab);
+ if (rc == SQLITE_OK)
+ rc = ras_mc_prepare_stmt(priv, &priv->stmt_devlink_event,
+ &devlink_event_tab);
+#endif
ras->db_priv = priv;
return 0;
int32_t psci_state;
};
+struct devlink_event {
+ char timestamp[64];
+ const char *bus_name;
+ const char *dev_name;
+ const char *driver_name;
+ const char *reporter_name;
+ const char *msg;
+};
+
struct ras_mc_event;
struct ras_aer_event;
struct ras_extlog_event;
struct ras_non_standard_event;
struct ras_arm_event;
struct mce_event;
+struct devlink_event;
#ifdef HAVE_SQLITE3
#ifdef HAVE_ARM
sqlite3_stmt *stmt_arm_record;
#endif
+#ifdef HAVE_DEVLINK
+ sqlite3_stmt *stmt_devlink_event;
+#endif
};
int ras_mc_event_opendb(unsigned cpu, struct ras_events *ras);
int ras_store_extlog_mem_record(struct ras_events *ras, struct ras_extlog_event *ev);
int ras_store_non_standard_record(struct ras_events *ras, struct ras_non_standard_event *ev);
int ras_store_arm_record(struct ras_events *ras, struct ras_arm_event *ev);
+int ras_store_devlink_event(struct ras_events *ras, struct devlink_event *ev);
#else
static inline int ras_mc_event_opendb(unsigned cpu, struct ras_events *ras) { return 0; };
static inline int ras_store_extlog_mem_record(struct ras_events *ras, struct ras_extlog_event *ev) { return 0; };
static inline int ras_store_non_standard_record(struct ras_events *ras, struct ras_non_standard_event *ev) { return 0; };
static inline int ras_store_arm_record(struct ras_events *ras, struct ras_arm_event *ev) { return 0; };
+static inline int ras_store_devlink_event(struct ras_events *ras, struct devlink_event *ev) { return 0; };
#endif
return 0;
}
+static int set_devlink_event_backtrace(char *buf, struct devlink_event *ev){
+ char bt_buf[MAX_BACKTRACE_SIZE];
+
+ if(!buf || !ev)
+ return -1;
+
+ sprintf(bt_buf, "BACKTRACE=" \
+ "timestamp=%s\n" \
+ "bus_name=%s\n" \
+ "dev_name=%s\n" \
+ "driver_name=%s\n" \
+ "reporter_name=%s\n" \
+ "msg=%s\n", \
+ ev->timestamp, \
+ ev->bus_name, \
+ ev->dev_name, \
+ ev->driver_name, \
+ ev->reporter_name, \
+ ev->msg);
+
+ strcat(buf, bt_buf);
+
+ return 0;
+}
+
static int commit_report_backtrace(int sockfd, int type, void *ev){
char buf[MAX_BACKTRACE_SIZE];
char *pbuf = buf;
case ARM_EVENT:
rc = set_arm_event_backtrace(buf, (struct ras_arm_event *)ev);
break;
+ case DEVLINK_EVENT:
+ rc = set_devlink_event_backtrace(buf, (struct devlink_event *)ev);
+ break;
default:
return -1;
}
return -1;
}
}
+
+int ras_report_devlink_event(struct ras_events *ras, struct devlink_event *ev){
+ char buf[MAX_MESSAGE_SIZE];
+ int sockfd = 0;
+ int done = 0;
+ int rc = -1;
+
+ memset(buf, 0, sizeof(buf));
+
+ sockfd = setup_report_socket();
+ if(sockfd < 0){
+ return -1;
+ }
+
+ rc = commit_report_basic(sockfd);
+ if(rc < 0){
+ goto devlink_fail;
+ }
+
+ rc = commit_report_backtrace(sockfd, DEVLINK_EVENT, ev);
+ if(rc < 0){
+ goto devlink_fail;
+ }
+
+ sprintf(buf, "ANALYZER=%s", "rasdaemon-devlink");
+ rc = write(sockfd, buf, strlen(buf) + 1);
+ if(rc < strlen(buf) + 1){
+ goto devlink_fail;
+ }
+
+ sprintf(buf, "REASON=%s", "devlink health report problem");
+ rc = write(sockfd, buf, strlen(buf) + 1);
+ if(rc < strlen(buf) + 1){
+ goto devlink_fail;
+ }
+
+ done = 1;
+
+devlink_fail:
+
+ if(sockfd > 0){
+ close(sockfd);
+ }
+
+ if(done){
+ return 0;
+ }else{
+ return -1;
+ }
+}
MCE_EVENT,
AER_EVENT,
NON_STANDARD_EVENT,
- ARM_EVENT
+ ARM_EVENT,
+ DEVLINK_EVENT
};
#ifdef HAVE_ABRT_REPORT
int ras_report_mce_event(struct ras_events *ras, struct mce_event *ev);
int ras_report_non_standard_event(struct ras_events *ras, struct ras_non_standard_event *ev);
int ras_report_arm_event(struct ras_events *ras, struct ras_arm_event *ev);
+int ras_report_devlink_event(struct ras_events *ras, struct devlink_event *ev);
#else
static inline int ras_report_mce_event(struct ras_events *ras, struct mce_event *ev) { return 0; };
static inline int ras_report_non_standard_event(struct ras_events *ras, struct ras_non_standard_event *ev) { return 0; };
static inline int ras_report_arm_event(struct ras_events *ras, struct ras_arm_event *ev) { return 0; };
+static inline int ras_report_devlink_event(struct ras_events *ras, struct devlink_event *ev) { return 0; };
#endif
}
$query_handle->finish;
+ # devlink errors
+ $query = "select dev_name, count(*) from devlink_event group by dev_name";
+ $query_handle = $dbh->prepare($query);
+ $query_handle->execute();
+ $query_handle->bind_columns(\($dev_name, $count));
+ $out = "";
+ while($query_handle->fetch()) {
+ $out .= "\t$dev_name has $count errors\n";
+ }
+ if ($out ne "") {
+ print "Devlink records summary:\n$out";
+ } else {
+ print "No devlink errors.\n";
+ }
+ $query_handle->finish;
+
# MCE mce_record errors
$query = "select error_msg, count(*) from mce_record group by error_msg";
$query_handle = $dbh->prepare($query);
}
$query_handle->finish;
+ # devlink errors
+ $query = "select id, timestamp, bus_name, dev_name, driver_name, reporter_name, msg from devlink_event order by id";
+ $query_handle = $dbh->prepare($query);
+ $query_handle->execute();
+ $query_handle->bind_columns(\($id, $timestamp, $bus_name, $dev_name, $driver_name, $reporter_name, $msg));
+ $out = "";
+ while($query_handle->fetch()) {
+ $out .= "$id $timestamp error: ";
+ $out .= "bus_name=$bus_name, ";
+ $out .= "dev_name=$dev_name, ";
+ $out .= "driver_name=$driver_name, ";
+ $out .= "reporter_name=$reporter_name, ";
+ $out .= "message='$msg', ";
+ $out .= "\n";
+ }
+ if ($out ne "") {
+ print "Devlink events:\n$out\n";
+ } else {
+ print "No devlink errors.\n\n";
+ }
+ $query_handle->finish;
+
# MCE mce_record errors
$query = "select id, timestamp, mcgcap, mcgstatus, status, addr, misc, ip, tsc, walltime, cpu, cpuid, apicid, socketid, cs, bank, cpuvendor, bank_name, error_msg, mcgstatus_msg, mcistatus_msg, user_action, mc_location from mce_record order by id";
$query_handle = $dbh->prepare($query);