]> www.infradead.org Git - users/mchehab/rasdaemon.git/commitdiff
Add support for PCIe AER events
authorMauro Carvalho Chehab <mchehab@redhat.com>
Wed, 15 May 2013 11:07:08 +0000 (08:07 -0300)
committerMauro Carvalho Chehab <mchehab@redhat.com>
Wed, 15 May 2013 11:30:19 +0000 (08:30 -0300)
The code is currently untested, as I'm missing a testing
system where I could inject PCIe AER events.

Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Makefile.am
TODO
configure.ac
ras-aer-handler.c [new file with mode: 0644]
ras-aer-handler.h [new file with mode: 0644]
ras-events.c
ras-events.h
ras-record.h

index 15ea39cfafa5e15f0c6a7d34a1c036fcf5e490c7..916d93e77f6717061357147aeaf31fbab9168383 100644 (file)
@@ -7,6 +7,9 @@ rasdaemon_SOURCES = rasdaemon.c ras-events.c ras-mc-handler.c
 if WITH_SQLITE3
    rasdaemon_SOURCES += ras-record.c
 endif
+if WITH_AER
+   rasdaemon_SOURCES += ras-aer-handler.c
+endif
 rasdaemon_LDADD = -lpthread $(SQLITE3_LIBS) libtrace/libtrace.a
 
 include_HEADERS = config.h  ras-events.h  ras-logger.h  ras-mc-handler.h  ras-record.h
diff --git a/TODO b/TODO
index b0321957c5312e90c245bb8344d85e9e161e217d..35c94048941f2e8bbed4ec2ac60e107ed8e5d26b 100644 (file)
--- a/TODO
+++ b/TODO
@@ -10,6 +10,6 @@
    the errors are just random noise or if they mean that memory is
    damaged.
 
-8) add support for PCIe AER trace records.
+8) test support for PCIe AER trace records.
 
 9) add support for mce trace records.
index 727957e9fe1413319231a5efd11cd397e272e5bd..c567367d82c1fea5a8590c7ae5b2b635b84ed99e 100644 (file)
@@ -21,7 +21,7 @@ AC_CONFIG_FILES([
 ])
 
 AC_ARG_ENABLE([sqlite3],
-    AS_HELP_STRING([--enable-sqlite], [enable storing data at SQL lite database (currently experimental]))
+    AS_HELP_STRING([--enable-sqlite], [enable storing data at SQL lite database (currently experimental)]))
 
 AS_IF([test "x$enable_sqlite3" = "xyes"], [
   AC_CHECK_LIB(sqlite3, sqlite3_open,[echo "found sqlite3"] , AC_MSG_ERROR([*** Unable to find sqlite3 library]), )
@@ -33,6 +33,17 @@ AS_IF([test "x$enable_sqlite3" = "xyes"], [
 AM_CONDITIONAL([WITH_SQLITE3], [test x$enable_sqlite3 = xyes])
 AC_SUBST([SQLITE3_LIBS])
 
+
+AC_ARG_ENABLE([aer],
+    AS_HELP_STRING([--enable-aer], [enable PCIe AER events (currently experimental)]))
+
+AS_IF([test "x$enable_aer" = "xyes"], [
+  AC_DEFINE(HAVE_AER,1,"have PCIe AER events collect")
+  AC_SUBST([WITH_AER])
+])
+
+AM_CONDITIONAL([WITH_AER], [test x$enable_aer = xyes])
+
 test "$sysconfdir" = '${prefix}/etc' && sysconfdir=/etc
 
 AC_OUTPUT
diff --git a/ras-aer-handler.c b/ras-aer-handler.c
new file mode 100644 (file)
index 0000000..0543bb6
--- /dev/null
@@ -0,0 +1,129 @@
+/*
+ * Copyright (C) 2013 Mauro Carvalho Chehab <mchehab@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+*/
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include "libtrace/kbuffer.h"
+#include "ras-aer-handler.h"
+#include "ras-record.h"
+#include "ras-logger.h"
+
+static const char *aer_errors[32] = {
+       /* Correctable errors */
+       [0]  = "Receiver Error",
+       [6]  = "Bad TLP",
+       [7]  = "Bad DLLP",
+       [8]  = "RELAY_NUM Rollover",
+       [12] = "Replay Timer Timeout",
+       [13] = "Advisory Non-Fatal",
+
+       /* Uncorrectable errors */
+       [4]  = "Data Link Protocol",
+       [12] = "Poisoned TLP",
+       [13] = "Flow Control Protocol",
+       [14] = "Completion Timeout",
+       [15] = "Completer Abort",
+       [16] = "Unexpected Completion",
+       [17] = "Receiver Overflow",
+       [18] = "Malformed TLP",
+       [19] = "ECRC",
+       [20] = "Unsupported Request",
+};
+
+char *aer_status_msg(char *buf, size_t len, unsigned int status)
+{
+       int i, n;
+       char *p = buf;
+
+       len--;
+
+       for (i = 0; i < 32; i++) {
+               if (status & (1 << i)) {
+                       if (p != buf) {
+                               n = snprintf(p, len, ", ");
+                               len -= n;
+                               p += n;
+                       }
+                       if (!aer_errors[i])
+                               n = snprintf(p, len, "BIT(%d)", i);
+                       else
+                               n = snprintf(p, len, "%s", aer_errors[i]);
+                       len -= n;
+                       p += n;
+               }
+       }
+
+       *p = 0;
+       return buf;
+}
+
+int ras_aer_event_handler(struct trace_seq *s,
+                        struct pevent_record *record,
+                        struct event_format *event, void *context)
+{
+       int len;
+       unsigned long long val;
+       struct ras_events *ras = context;
+       time_t now;
+       struct tm *tm;
+       struct ras_aer_event ev;
+       char buf[1024];
+
+       /*
+        * Newer kernels (3.10-rc1 or upper) provide an uptime clock.
+        * On previous kernels, the way to properly generate an event would
+        * be to inject a fake one, measure its timestamp and diff it against
+        * gettimeofday. We won't do it here. Instead, let's use uptime,
+        * falling-back to the event report's time, if "uptime" clock is
+        * not available (legacy kernels).
+        */
+
+       if (ras->use_uptime)
+               now = record->ts/1000000000L + ras->uptime_diff;
+       else
+               now = time(NULL);
+
+       tm = localtime(&now);
+       if (tm)
+               strftime(ev.timestamp, sizeof(ev.timestamp),
+                        "%Y-%m-%d %H:%M:%S %z", tm);
+       trace_seq_printf(s, "%s ", ev.timestamp);
+
+       ev.dev_name = pevent_get_field_raw(s, event, "dev_name",
+                                          record, &len, 1);
+       if (!ev.dev_name)
+               return -1;
+
+       if (pevent_get_field_val(s,  event, "status", record, &val, 1) < 0)
+               return -1;
+
+       /* Fills the error buffer */
+       ev.msg = aer_status_msg(buf, sizeof(buf), val);
+       trace_seq_printf(s, "%s ", ev.msg);
+
+       if (pevent_get_field_val(s, event, "severity", record, &val, 1) < 0)
+               return -1;
+       ev.error_type = mc_event_error_type(val);
+       trace_seq_puts(s, ev.error_type);
+
+       /* Insert data into the SGBD */
+//     ras_store_aer_event(ras, &ev);
+
+       return 0;
+}
diff --git a/ras-aer-handler.h b/ras-aer-handler.h
new file mode 100644 (file)
index 0000000..5e9c198
--- /dev/null
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2013 Mauro Carvalho Chehab <mchehab@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+*/
+
+#ifndef __RAS_AER_HANDLER_H
+#define __RAS_AER_HANDLER_H
+
+#include "ras-events.h"
+#include "libtrace/event-parse.h"
+
+int ras_aer_event_handler(struct trace_seq *s,
+                        struct pevent_record *record,
+                        struct event_format *event, void *context);
+
+#endif
index 8a45f76da7371420c3cebcc15819837e84c17efe..7f9be8848d7f0053aa44126ae519d0ef06efc46c 100644 (file)
@@ -28,6 +28,7 @@
 #include "libtrace/kbuffer.h"
 #include "libtrace/event-parse.h"
 #include "ras-mc-handler.h"
+#include "ras-aer-handler.h"
 #include "ras-record.h"
 #include "ras-logger.h"
 
@@ -481,9 +482,15 @@ int handle_ras_events(int record_events)
        ras->page_size = page_size;
         ras->record_events = record_events;
 
+       /* Registers the special event handlers */
        pevent_register_event_handler(pevent, -1, "ras", "mc_event",
                                      ras_mc_event_handler, ras);
 
+#ifdef HAVE_AER
+       pevent_register_event_handler(pevent, -1, "ras", "aer_event",
+                                     ras_aer_event_handler, ras);
+#endif
+
        rc = pevent_parse_event(pevent, page, size, "ras");
        free(page);
        if (rc)
index 6b081ed6716e465586b656db0a66edd44e0fc969..a65e0aa3c33bec8cd77e40f2413b534c6ad56b78 100644 (file)
 
 #include "ras-record.h"
 
-#include <pthread.h>
-#include <time.h>
-
 #ifndef __RAS_EVENTS_H
 #define __RAS_EVENTS_H
 
+#include <pthread.h>
+#include <time.h>
+
 #define MAX_PATH 1024
 #define STR(x) #x
 
index 7270853ee46c6cd3d1e3a56e419e4ef44f864000..20c327f084b46a2c7e044e6404430a31f7fe6396 100644 (file)
@@ -33,6 +33,13 @@ struct ras_mc_event {
        const char *driver_detail;
 };
 
+struct ras_aer_event {
+       char timestamp[64];
+       const char *error_type;
+       const char *dev_name;
+       const char *msg;
+};
+
 #ifdef HAVE_SQLITE3
 
 #include <sqlite3.h>