From: Mauro Carvalho Chehab Date: Wed, 15 May 2013 11:07:08 +0000 (-0300) Subject: Add support for PCIe AER events X-Git-Tag: v0.3.0~30 X-Git-Url: https://www.infradead.org/git/?a=commitdiff_plain;h=e3e59d396e5044fd63f298d570e37a661ff9c5e1;p=users%2Fmchehab%2Frasdaemon.git Add support for PCIe AER events The code is currently untested, as I'm missing a testing system where I could inject PCIe AER events. Signed-off-by: Mauro Carvalho Chehab --- diff --git a/Makefile.am b/Makefile.am index 15ea39c..916d93e 100644 --- a/Makefile.am +++ b/Makefile.am @@ -7,6 +7,9 @@ rasdaemon_SOURCES = rasdaemon.c ras-events.c ras-mc-handler.c if WITH_SQLITE3 rasdaemon_SOURCES += ras-record.c endif +if WITH_AER + rasdaemon_SOURCES += ras-aer-handler.c +endif rasdaemon_LDADD = -lpthread $(SQLITE3_LIBS) libtrace/libtrace.a include_HEADERS = config.h ras-events.h ras-logger.h ras-mc-handler.h ras-record.h diff --git a/TODO b/TODO index b032195..35c9404 100644 --- a/TODO +++ b/TODO @@ -10,6 +10,6 @@ the errors are just random noise or if they mean that memory is damaged. -8) add support for PCIe AER trace records. +8) test support for PCIe AER trace records. 9) add support for mce trace records. diff --git a/configure.ac b/configure.ac index 727957e..c567367 100644 --- a/configure.ac +++ b/configure.ac @@ -21,7 +21,7 @@ AC_CONFIG_FILES([ ]) AC_ARG_ENABLE([sqlite3], - AS_HELP_STRING([--enable-sqlite], [enable storing data at SQL lite database (currently experimental])) + AS_HELP_STRING([--enable-sqlite], [enable storing data at SQL lite database (currently experimental)])) AS_IF([test "x$enable_sqlite3" = "xyes"], [ AC_CHECK_LIB(sqlite3, sqlite3_open,[echo "found sqlite3"] , AC_MSG_ERROR([*** Unable to find sqlite3 library]), ) @@ -33,6 +33,17 @@ AS_IF([test "x$enable_sqlite3" = "xyes"], [ AM_CONDITIONAL([WITH_SQLITE3], [test x$enable_sqlite3 = xyes]) AC_SUBST([SQLITE3_LIBS]) + +AC_ARG_ENABLE([aer], + AS_HELP_STRING([--enable-aer], [enable PCIe AER events (currently experimental)])) + +AS_IF([test "x$enable_aer" = "xyes"], [ + AC_DEFINE(HAVE_AER,1,"have PCIe AER events collect") + AC_SUBST([WITH_AER]) +]) + +AM_CONDITIONAL([WITH_AER], [test x$enable_aer = xyes]) + test "$sysconfdir" = '${prefix}/etc' && sysconfdir=/etc AC_OUTPUT diff --git a/ras-aer-handler.c b/ras-aer-handler.c new file mode 100644 index 0000000..0543bb6 --- /dev/null +++ b/ras-aer-handler.c @@ -0,0 +1,129 @@ +/* + * Copyright (C) 2013 Mauro Carvalho Chehab + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +*/ +#include +#include +#include +#include +#include "libtrace/kbuffer.h" +#include "ras-aer-handler.h" +#include "ras-record.h" +#include "ras-logger.h" + +static const char *aer_errors[32] = { + /* Correctable errors */ + [0] = "Receiver Error", + [6] = "Bad TLP", + [7] = "Bad DLLP", + [8] = "RELAY_NUM Rollover", + [12] = "Replay Timer Timeout", + [13] = "Advisory Non-Fatal", + + /* Uncorrectable errors */ + [4] = "Data Link Protocol", + [12] = "Poisoned TLP", + [13] = "Flow Control Protocol", + [14] = "Completion Timeout", + [15] = "Completer Abort", + [16] = "Unexpected Completion", + [17] = "Receiver Overflow", + [18] = "Malformed TLP", + [19] = "ECRC", + [20] = "Unsupported Request", +}; + +char *aer_status_msg(char *buf, size_t len, unsigned int status) +{ + int i, n; + char *p = buf; + + len--; + + for (i = 0; i < 32; i++) { + if (status & (1 << i)) { + if (p != buf) { + n = snprintf(p, len, ", "); + len -= n; + p += n; + } + if (!aer_errors[i]) + n = snprintf(p, len, "BIT(%d)", i); + else + n = snprintf(p, len, "%s", aer_errors[i]); + len -= n; + p += n; + } + } + + *p = 0; + return buf; +} + +int ras_aer_event_handler(struct trace_seq *s, + struct pevent_record *record, + struct event_format *event, void *context) +{ + int len; + unsigned long long val; + struct ras_events *ras = context; + time_t now; + struct tm *tm; + struct ras_aer_event ev; + char buf[1024]; + + /* + * Newer kernels (3.10-rc1 or upper) provide an uptime clock. + * On previous kernels, the way to properly generate an event would + * be to inject a fake one, measure its timestamp and diff it against + * gettimeofday. We won't do it here. Instead, let's use uptime, + * falling-back to the event report's time, if "uptime" clock is + * not available (legacy kernels). + */ + + if (ras->use_uptime) + now = record->ts/1000000000L + ras->uptime_diff; + else + now = time(NULL); + + tm = localtime(&now); + if (tm) + strftime(ev.timestamp, sizeof(ev.timestamp), + "%Y-%m-%d %H:%M:%S %z", tm); + trace_seq_printf(s, "%s ", ev.timestamp); + + ev.dev_name = pevent_get_field_raw(s, event, "dev_name", + record, &len, 1); + if (!ev.dev_name) + return -1; + + if (pevent_get_field_val(s, event, "status", record, &val, 1) < 0) + return -1; + + /* Fills the error buffer */ + ev.msg = aer_status_msg(buf, sizeof(buf), val); + trace_seq_printf(s, "%s ", ev.msg); + + if (pevent_get_field_val(s, event, "severity", record, &val, 1) < 0) + return -1; + ev.error_type = mc_event_error_type(val); + trace_seq_puts(s, ev.error_type); + + /* Insert data into the SGBD */ +// ras_store_aer_event(ras, &ev); + + return 0; +} diff --git a/ras-aer-handler.h b/ras-aer-handler.h new file mode 100644 index 0000000..5e9c198 --- /dev/null +++ b/ras-aer-handler.h @@ -0,0 +1,29 @@ +/* + * Copyright (C) 2013 Mauro Carvalho Chehab + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +*/ + +#ifndef __RAS_AER_HANDLER_H +#define __RAS_AER_HANDLER_H + +#include "ras-events.h" +#include "libtrace/event-parse.h" + +int ras_aer_event_handler(struct trace_seq *s, + struct pevent_record *record, + struct event_format *event, void *context); + +#endif diff --git a/ras-events.c b/ras-events.c index 8a45f76..7f9be88 100644 --- a/ras-events.c +++ b/ras-events.c @@ -28,6 +28,7 @@ #include "libtrace/kbuffer.h" #include "libtrace/event-parse.h" #include "ras-mc-handler.h" +#include "ras-aer-handler.h" #include "ras-record.h" #include "ras-logger.h" @@ -481,9 +482,15 @@ int handle_ras_events(int record_events) ras->page_size = page_size; ras->record_events = record_events; + /* Registers the special event handlers */ pevent_register_event_handler(pevent, -1, "ras", "mc_event", ras_mc_event_handler, ras); +#ifdef HAVE_AER + pevent_register_event_handler(pevent, -1, "ras", "aer_event", + ras_aer_event_handler, ras); +#endif + rc = pevent_parse_event(pevent, page, size, "ras"); free(page); if (rc) diff --git a/ras-events.h b/ras-events.h index 6b081ed..a65e0aa 100644 --- a/ras-events.h +++ b/ras-events.h @@ -18,12 +18,12 @@ #include "ras-record.h" -#include -#include - #ifndef __RAS_EVENTS_H #define __RAS_EVENTS_H +#include +#include + #define MAX_PATH 1024 #define STR(x) #x diff --git a/ras-record.h b/ras-record.h index 7270853..20c327f 100644 --- a/ras-record.h +++ b/ras-record.h @@ -33,6 +33,13 @@ struct ras_mc_event { const char *driver_detail; }; +struct ras_aer_event { + char timestamp[64]; + const char *error_type; + const char *dev_name; + const char *msg; +}; + #ifdef HAVE_SQLITE3 #include