From d8489352cea0d01b2eeb2f39df010c72d45c299f Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 25 Apr 2019 13:21:19 -0700 Subject: [PATCH] Add devlink events Signed-off-by: Cong Wang --- Makefile.am | 6 ++- configure.ac | 10 +++++ ras-devlink-handler.c | 93 +++++++++++++++++++++++++++++++++++++++++++ ras-devlink-handler.h | 29 ++++++++++++++ ras-events.c | 16 ++++++++ ras-record.c | 57 ++++++++++++++++++++++++++ ras-record.h | 15 +++++++ ras-report.c | 78 ++++++++++++++++++++++++++++++++++++ ras-report.h | 5 ++- util/ras-mc-ctl.in | 38 ++++++++++++++++++ 10 files changed, 345 insertions(+), 2 deletions(-) create mode 100644 ras-devlink-handler.c create mode 100644 ras-devlink-handler.h diff --git a/Makefile.am b/Makefile.am index 011ddb2..f036ffd 100644 --- a/Makefile.am +++ b/Makefile.am @@ -42,6 +42,9 @@ endif if WITH_EXTLOG rasdaemon_SOURCES += ras-extlog-handler.c endif +if WITH_DEVLINK + rasdaemon_SOURCES += ras-devlink-handler.c +endif if WITH_ABRT_REPORT rasdaemon_SOURCES += ras-report.c endif @@ -52,7 +55,8 @@ rasdaemon_LDADD = -lpthread $(SQLITE3_LIBS) libtrace/libtrace.a include_HEADERS = config.h ras-events.h ras-logger.h ras-mc-handler.h \ ras-aer-handler.h ras-mce-handler.h ras-record.h bitfield.h ras-report.h \ - ras-extlog-handler.h ras-arm-handler.h ras-non-standard-handler.h + ras-extlog-handler.h ras-arm-handler.h ras-non-standard-handler.h \ + ras-devlink-handler.h # This rule can't be called with more than one Makefile job (like make -j8) # I can't figure out a way to fix that diff --git a/configure.ac b/configure.ac index 6ad5421..fecff51 100644 --- a/configure.ac +++ b/configure.ac @@ -80,6 +80,15 @@ AS_IF([test "x$enable_extlog" = "xyes"], [ ]) AM_CONDITIONAL([WITH_EXTLOG], [test x$enable_extlog = xyes]) +AC_ARG_ENABLE([devlink], + AS_HELP_STRING([--enable-devlink], [enable devlink health events (currently experimental)])) + +AS_IF([test "x$enable_devlink" = "xyes"], [ + AC_DEFINE(HAVE_DEVLINK,1,"have devlink health events collect") + AC_SUBST([WITH_DEVLINK]) +]) +AM_CONDITIONAL([WITH_DEVLINK], [test x$enable_devlink = xyes]) + AC_ARG_ENABLE([abrt_report], AS_HELP_STRING([--enable-abrt-report], [enable report event to ABRT (currently experimental)])) @@ -127,4 +136,5 @@ compile time options summary ABRT report : $enable_abrt_report HIP07 SAS HW errors : $enable_hisi_ns_decode ARM events : $enable_arm + DEVLINK : $enable_devlink EOF diff --git a/ras-devlink-handler.c b/ras-devlink-handler.c new file mode 100644 index 0000000..0fe46d2 --- /dev/null +++ b/ras-devlink-handler.c @@ -0,0 +1,93 @@ +/* + * Copyright (C) 2019 Cong Wang + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +*/ +#include +#include +#include +#include +#include "libtrace/kbuffer.h" +#include "ras-devlink-handler.h" +#include "ras-record.h" +#include "ras-logger.h" +#include "ras-report.h" + +int ras_devlink_event_handler(struct trace_seq *s, + struct pevent_record *record, + struct event_format *event, void *context) +{ + int len; + struct ras_events *ras = context; + time_t now; + struct tm *tm; + struct devlink_event ev; + + /* + * Newer kernels (3.10-rc1 or upper) provide an uptime clock. + * On previous kernels, the way to properly generate an event would + * be to inject a fake one, measure its timestamp and diff it against + * gettimeofday. We won't do it here. Instead, let's use uptime, + * falling-back to the event report's time, if "uptime" clock is + * not available (legacy kernels). + */ + + if (ras->use_uptime) + now = record->ts/user_hz + ras->uptime_diff; + else + now = time(NULL); + + tm = localtime(&now); + if (tm) + strftime(ev.timestamp, sizeof(ev.timestamp), + "%Y-%m-%d %H:%M:%S %z", tm); + trace_seq_printf(s, "%s ", ev.timestamp); + + ev.bus_name = pevent_get_field_raw(s, event, "bus_name", + record, &len, 1); + if (!ev.bus_name) + return -1; + + ev.dev_name = pevent_get_field_raw(s, event, "dev_name", + record, &len, 1); + if (!ev.dev_name) + return -1; + + ev.driver_name = pevent_get_field_raw(s, event, "driver_name", + record, &len, 1); + if (!ev.driver_name) + return -1; + + ev.reporter_name = pevent_get_field_raw(s, event, "reporter_name", + record, &len, 1); + if (!ev.reporter_name) + return -1; + + ev.msg = pevent_get_field_raw(s, event, "msg", record, &len, 1); + if (!ev.msg) + return -1; + + /* Insert data into the SGBD */ +#ifdef HAVE_SQLITE3 + ras_store_devlink_event(ras, &ev); +#endif + +#ifdef HAVE_ABRT_REPORT + /* Report event to ABRT */ + ras_report_devlink_event(ras, &ev); +#endif + + return 0; +} diff --git a/ras-devlink-handler.h b/ras-devlink-handler.h new file mode 100644 index 0000000..29b64f7 --- /dev/null +++ b/ras-devlink-handler.h @@ -0,0 +1,29 @@ +/* + * Copyright (C) 2019 Cong Wang + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +*/ + +#ifndef __RAS_DEVLINK_HANDLER_H +#define __RAS_DEVLINK_HANDLER_H + +#include "ras-events.h" +#include "libtrace/event-parse.h" + +int ras_devlink_event_handler(struct trace_seq *s, + struct pevent_record *record, + struct event_format *event, void *context); + +#endif diff --git a/ras-events.c b/ras-events.c index 9395f6f..a75f979 100644 --- a/ras-events.c +++ b/ras-events.c @@ -33,6 +33,7 @@ #include "ras-arm-handler.h" #include "ras-mce-handler.h" #include "ras-extlog-handler.h" +#include "ras-devlink-handler.h" #include "ras-record.h" #include "ras-logger.h" @@ -218,6 +219,10 @@ int toggle_ras_mc_event(int enable) rc |= __toggle_ras_mc_event(ras, "ras", "arm_event", enable); #endif +#ifdef HAVE_DEVLINK + rc |= __toggle_ras_mc_event(ras, "devlink", "devlink_health_report", enable); +#endif + free_ras: free(ras); return rc; @@ -736,6 +741,17 @@ int handle_ras_events(int record_events) "ras", "aer_event"); #endif +#ifdef HAVE_DEVLINK + rc = add_event_handler(ras, pevent, page_size, "devlink", + "devlink_health_report", + ras_devlink_event_handler); + if (!rc) + num_events++; + else + log(ALL, LOG_ERR, "Can't get traces from %s:%s\n", + "devlink", "devlink_health_report"); +#endif + if (!num_events) { log(ALL, LOG_INFO, "Failed to trace all supported RAS events. Aborting.\n"); diff --git a/ras-record.c b/ras-record.c index 2e7525e..4c8b55b 100644 --- a/ras-record.c +++ b/ras-record.c @@ -404,6 +404,57 @@ int ras_store_mce_record(struct ras_events *ras, struct mce_event *ev) } #endif +/* + * Table and functions to handle devlink:devlink_health_report + */ + +#ifdef HAVE_DEVLINK +static const struct db_fields devlink_event_fields[] = { + { .name="id", .type="INTEGER PRIMARY KEY" }, + { .name="timestamp", .type="TEXT" }, + { .name="bus_name", .type="TEXT" }, + { .name="dev_name", .type="TEXT" }, + { .name="driver_name", .type="TEXT" }, + { .name="reporter_name", .type="TEXT" }, + { .name="msg", .type="TEXT" }, +}; + +static const struct db_table_descriptor devlink_event_tab = { + .name = "devlink_event", + .fields = devlink_event_fields, + .num_fields = ARRAY_SIZE(devlink_event_fields), +}; + +int ras_store_devlink_event(struct ras_events *ras, struct devlink_event *ev) +{ + int rc; + struct sqlite3_priv *priv = ras->db_priv; + + if (!priv || !priv->stmt_devlink_event) + return 0; + log(TERM, LOG_INFO, "devlink_event store: %p\n", priv->stmt_devlink_event); + + sqlite3_bind_text(priv->stmt_devlink_event, 1, ev->timestamp, -1, NULL); + sqlite3_bind_text(priv->stmt_devlink_event, 2, ev->bus_name, -1, NULL); + sqlite3_bind_text(priv->stmt_devlink_event, 3, ev->dev_name, -1, NULL); + sqlite3_bind_text(priv->stmt_devlink_event, 4, ev->driver_name, -1, NULL); + sqlite3_bind_text(priv->stmt_devlink_event, 5, ev->reporter_name, -1, NULL); + sqlite3_bind_text(priv->stmt_devlink_event, 6, ev->msg, -1, NULL); + + rc = sqlite3_step(priv->stmt_devlink_event); + if (rc != SQLITE_OK && rc != SQLITE_DONE) + log(TERM, LOG_ERR, + "Failed to do devlink_event step on sqlite: error = %d\n", rc); + rc = sqlite3_reset(priv->stmt_devlink_event); + if (rc != SQLITE_OK && rc != SQLITE_DONE) + log(TERM, LOG_ERR, + "Failed reset devlink_event on sqlite: error = %d\n", + rc); + log(TERM, LOG_INFO, "register inserted at db\n"); + + return rc; +} +#endif /* * Generic code @@ -567,6 +618,12 @@ int ras_mc_event_opendb(unsigned cpu, struct ras_events *ras) rc = ras_mc_prepare_stmt(priv, &priv->stmt_arm_record, &arm_event_tab); #endif +#ifdef HAVE_DEVLINK + rc = ras_mc_create_table(priv, &devlink_event_tab); + if (rc == SQLITE_OK) + rc = ras_mc_prepare_stmt(priv, &priv->stmt_devlink_event, + &devlink_event_tab); +#endif ras->db_priv = priv; return 0; diff --git a/ras-record.h b/ras-record.h index a11f290..f230ed2 100644 --- a/ras-record.h +++ b/ras-record.h @@ -75,12 +75,22 @@ struct ras_arm_event { int32_t psci_state; }; +struct devlink_event { + char timestamp[64]; + const char *bus_name; + const char *dev_name; + const char *driver_name; + const char *reporter_name; + const char *msg; +}; + struct ras_mc_event; struct ras_aer_event; struct ras_extlog_event; struct ras_non_standard_event; struct ras_arm_event; struct mce_event; +struct devlink_event; #ifdef HAVE_SQLITE3 @@ -104,6 +114,9 @@ struct sqlite3_priv { #ifdef HAVE_ARM sqlite3_stmt *stmt_arm_record; #endif +#ifdef HAVE_DEVLINK + sqlite3_stmt *stmt_devlink_event; +#endif }; int ras_mc_event_opendb(unsigned cpu, struct ras_events *ras); @@ -113,6 +126,7 @@ int ras_store_mce_record(struct ras_events *ras, struct mce_event *ev); int ras_store_extlog_mem_record(struct ras_events *ras, struct ras_extlog_event *ev); int ras_store_non_standard_record(struct ras_events *ras, struct ras_non_standard_event *ev); int ras_store_arm_record(struct ras_events *ras, struct ras_arm_event *ev); +int ras_store_devlink_event(struct ras_events *ras, struct devlink_event *ev); #else static inline int ras_mc_event_opendb(unsigned cpu, struct ras_events *ras) { return 0; }; @@ -122,6 +136,7 @@ static inline int ras_store_mce_record(struct ras_events *ras, struct mce_event static inline int ras_store_extlog_mem_record(struct ras_events *ras, struct ras_extlog_event *ev) { return 0; }; static inline int ras_store_non_standard_record(struct ras_events *ras, struct ras_non_standard_event *ev) { return 0; }; static inline int ras_store_arm_record(struct ras_events *ras, struct ras_arm_event *ev) { return 0; }; +static inline int ras_store_devlink_event(struct ras_events *ras, struct devlink_event *ev) { return 0; }; #endif diff --git a/ras-report.c b/ras-report.c index cb0a9e8..785a302 100644 --- a/ras-report.c +++ b/ras-report.c @@ -256,6 +256,31 @@ static int set_arm_event_backtrace(char *buf, struct ras_arm_event *ev){ return 0; } +static int set_devlink_event_backtrace(char *buf, struct devlink_event *ev){ + char bt_buf[MAX_BACKTRACE_SIZE]; + + if(!buf || !ev) + return -1; + + sprintf(bt_buf, "BACKTRACE=" \ + "timestamp=%s\n" \ + "bus_name=%s\n" \ + "dev_name=%s\n" \ + "driver_name=%s\n" \ + "reporter_name=%s\n" \ + "msg=%s\n", \ + ev->timestamp, \ + ev->bus_name, \ + ev->dev_name, \ + ev->driver_name, \ + ev->reporter_name, \ + ev->msg); + + strcat(buf, bt_buf); + + return 0; +} + static int commit_report_backtrace(int sockfd, int type, void *ev){ char buf[MAX_BACKTRACE_SIZE]; char *pbuf = buf; @@ -284,6 +309,9 @@ static int commit_report_backtrace(int sockfd, int type, void *ev){ case ARM_EVENT: rc = set_arm_event_backtrace(buf, (struct ras_arm_event *)ev); break; + case DEVLINK_EVENT: + rc = set_devlink_event_backtrace(buf, (struct devlink_event *)ev); + break; default: return -1; } @@ -550,3 +578,53 @@ mce_fail: return -1; } } + +int ras_report_devlink_event(struct ras_events *ras, struct devlink_event *ev){ + char buf[MAX_MESSAGE_SIZE]; + int sockfd = 0; + int done = 0; + int rc = -1; + + memset(buf, 0, sizeof(buf)); + + sockfd = setup_report_socket(); + if(sockfd < 0){ + return -1; + } + + rc = commit_report_basic(sockfd); + if(rc < 0){ + goto devlink_fail; + } + + rc = commit_report_backtrace(sockfd, DEVLINK_EVENT, ev); + if(rc < 0){ + goto devlink_fail; + } + + sprintf(buf, "ANALYZER=%s", "rasdaemon-devlink"); + rc = write(sockfd, buf, strlen(buf) + 1); + if(rc < strlen(buf) + 1){ + goto devlink_fail; + } + + sprintf(buf, "REASON=%s", "devlink health report problem"); + rc = write(sockfd, buf, strlen(buf) + 1); + if(rc < strlen(buf) + 1){ + goto devlink_fail; + } + + done = 1; + +devlink_fail: + + if(sockfd > 0){ + close(sockfd); + } + + if(done){ + return 0; + }else{ + return -1; + } +} diff --git a/ras-report.h b/ras-report.h index 6c466f5..cb133a1 100644 --- a/ras-report.h +++ b/ras-report.h @@ -34,7 +34,8 @@ enum { MCE_EVENT, AER_EVENT, NON_STANDARD_EVENT, - ARM_EVENT + ARM_EVENT, + DEVLINK_EVENT }; #ifdef HAVE_ABRT_REPORT @@ -44,6 +45,7 @@ int ras_report_aer_event(struct ras_events *ras, struct ras_aer_event *ev); int ras_report_mce_event(struct ras_events *ras, struct mce_event *ev); int ras_report_non_standard_event(struct ras_events *ras, struct ras_non_standard_event *ev); int ras_report_arm_event(struct ras_events *ras, struct ras_arm_event *ev); +int ras_report_devlink_event(struct ras_events *ras, struct devlink_event *ev); #else @@ -52,6 +54,7 @@ static inline int ras_report_aer_event(struct ras_events *ras, struct ras_aer_ev static inline int ras_report_mce_event(struct ras_events *ras, struct mce_event *ev) { return 0; }; static inline int ras_report_non_standard_event(struct ras_events *ras, struct ras_non_standard_event *ev) { return 0; }; static inline int ras_report_arm_event(struct ras_events *ras, struct ras_arm_event *ev) { return 0; }; +static inline int ras_report_devlink_event(struct ras_events *ras, struct devlink_event *ev) { return 0; }; #endif diff --git a/util/ras-mc-ctl.in b/util/ras-mc-ctl.in index aee431a..7342683 100755 --- a/util/ras-mc-ctl.in +++ b/util/ras-mc-ctl.in @@ -1175,6 +1175,22 @@ sub summary } $query_handle->finish; + # devlink errors + $query = "select dev_name, count(*) from devlink_event group by dev_name"; + $query_handle = $dbh->prepare($query); + $query_handle->execute(); + $query_handle->bind_columns(\($dev_name, $count)); + $out = ""; + while($query_handle->fetch()) { + $out .= "\t$dev_name has $count errors\n"; + } + if ($out ne "") { + print "Devlink records summary:\n$out"; + } else { + print "No devlink errors.\n"; + } + $query_handle->finish; + # MCE mce_record errors $query = "select error_msg, count(*) from mce_record group by error_msg"; $query_handle = $dbh->prepare($query); @@ -1264,6 +1280,28 @@ sub errors } $query_handle->finish; + # devlink errors + $query = "select id, timestamp, bus_name, dev_name, driver_name, reporter_name, msg from devlink_event order by id"; + $query_handle = $dbh->prepare($query); + $query_handle->execute(); + $query_handle->bind_columns(\($id, $timestamp, $bus_name, $dev_name, $driver_name, $reporter_name, $msg)); + $out = ""; + while($query_handle->fetch()) { + $out .= "$id $timestamp error: "; + $out .= "bus_name=$bus_name, "; + $out .= "dev_name=$dev_name, "; + $out .= "driver_name=$driver_name, "; + $out .= "reporter_name=$reporter_name, "; + $out .= "message='$msg', "; + $out .= "\n"; + } + if ($out ne "") { + print "Devlink events:\n$out\n"; + } else { + print "No devlink errors.\n\n"; + } + $query_handle->finish; + # MCE mce_record errors $query = "select id, timestamp, mcgcap, mcgstatus, status, addr, misc, ip, tsc, walltime, cpu, cpuid, apicid, socketid, cs, bank, cpuvendor, bank_name, error_msg, mcgstatus_msg, mcistatus_msg, user_action, mc_location from mce_record order by id"; $query_handle = $dbh->prepare($query); -- 2.50.1