]> www.infradead.org Git - users/mchehab/rasdaemon.git/commitdiff
Add per-cpu-type handlers for MCE log
authorMauro Carvalho Chehab <mchehab@redhat.com>
Wed, 15 May 2013 19:34:49 +0000 (16:34 -0300)
committerMauro Carvalho Chehab <mchehab@redhat.com>
Wed, 15 May 2013 19:36:21 +0000 (16:36 -0300)
For now, only the bank information is handled.

Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Makefile.am
mce-amd-k8.c [new file with mode: 0644]
mce-intel.c [new file with mode: 0644]
ras-mce-handler.c
ras-mce-handler.h

index e70a51477ce5274c3388668f59e09ed40b140ddc..c8819071e59a842ae15d9f6edf1678a27e4f7d1c 100644 (file)
@@ -11,7 +11,7 @@ if WITH_AER
    rasdaemon_SOURCES += ras-aer-handler.c
 endif
 if WITH_MCE
-   rasdaemon_SOURCES += ras-mce-handler.c
+   rasdaemon_SOURCES += ras-mce-handler.c mce-intel.c mce-amd-k8.c
 endif
 rasdaemon_LDADD = -lpthread $(SQLITE3_LIBS) libtrace/libtrace.a
 
diff --git a/mce-amd-k8.c b/mce-amd-k8.c
new file mode 100644 (file)
index 0000000..6659b1f
--- /dev/null
@@ -0,0 +1,89 @@
+/*
+ * Copyright (C) 2013 Mauro Carvalho Chehab <mchehab@redhat.com>
+ *
+ * The code below were adapted from Andi Kleen/Intel/SuSe mcelog code,
+ * released under GNU Public General License, v.2
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+*/
+
+#include <stdio.h>
+
+#include "ras-mce-handler.h"
+
+#define K8_MCE_THRESHOLD_BASE        (MCE_EXTENDED_BANK + 1)      /* MCE_AMD */
+#define K8_MCE_THRESHOLD_TOP         (K8_MCE_THRESHOLD_BASE + 6 * 9)
+#define K8_MCELOG_THRESHOLD_DRAM_ECC (4 * 9 + 0)
+#define K8_MCELOG_THRESHOLD_LINK     (4 * 9 + 1)
+#define K8_MCELOG_THRESHOLD_L3_CACHE (4 * 9 + 2)
+#define K8_MCELOG_THRESHOLD_FBDIMM   (4 * 9 + 3)
+
+static char *k8bank[] = {
+       "data cache",
+       "instruction cache",
+       "bus unit",
+       "load/store unit",
+       "northbridge",
+       "fixed-issue reoder"
+};
+
+static char *k8threshold[] = {
+       [0 ... K8_MCELOG_THRESHOLD_DRAM_ECC - 1] = "Unknow threshold counter",
+       [K8_MCELOG_THRESHOLD_DRAM_ECC] = "MC4_MISC0 DRAM threshold",
+       [K8_MCELOG_THRESHOLD_LINK] = "MC4_MISC1 Link threshold",
+       [K8_MCELOG_THRESHOLD_L3_CACHE] = "MC4_MISC2 L3 Cache threshold",
+       [K8_MCELOG_THRESHOLD_FBDIMM] = "MC4_MISC3 FBDIMM threshold",
+       [K8_MCELOG_THRESHOLD_FBDIMM + 1 ...
+        K8_MCE_THRESHOLD_TOP - K8_MCE_THRESHOLD_BASE - 1] =
+                "Unknown threshold counter",
+};
+
+static char *bank_name(unsigned bank)
+{
+       static char buf[64];
+       char *s = "unknown";
+       if (bank < ARRAY_SIZE(k8bank))
+               s = k8bank[bank];
+       else if (bank >= K8_MCE_THRESHOLD_BASE &&
+                bank < K8_MCE_THRESHOLD_TOP)
+               s = k8threshold[bank - K8_MCE_THRESHOLD_BASE];
+       else {
+               sprintf(buf, "bank=%x", bank);
+               return buf;
+       }
+       snprintf(buf, sizeof(buf) - 1, "%s (bank=%d)", s, bank);
+       return buf;
+}
+
+void dump_amd_k8_event(struct ras_events *ras,
+                      struct trace_seq *s, struct mce_event *e)
+{
+       trace_seq_printf(s, "%s ",bank_name(e->bank));
+       trace_seq_printf(s, "mcgcap= %d ", e->mcgcap);
+       trace_seq_printf(s, ", mcgstatus= %d ", e->mcgstatus);
+       trace_seq_printf(s, ", status= %d ", e->status);
+       trace_seq_printf(s, ", addr= %d ", e->addr);
+       trace_seq_printf(s, ", misc= %d ", e->misc);
+       trace_seq_printf(s, ", ip= %d ", e->ip);
+       trace_seq_printf(s, ", tsc= %d ", e->tsc);
+       trace_seq_printf(s, ", walltime= %d ", e->walltime);
+       trace_seq_printf(s, ", cpu= %d ", e->cpu);
+       trace_seq_printf(s, ", cpuid= %d ", e->cpuid);
+       trace_seq_printf(s, ", apicid= %d ", e->apicid);
+       trace_seq_printf(s, ", socketid= %d ", e->socketid);
+       trace_seq_printf(s, ", cs= %d ", e->cs);
+       trace_seq_printf(s, ", cpuvendor= %d", e->cpuvendor);
+}
+
diff --git a/mce-intel.c b/mce-intel.c
new file mode 100644 (file)
index 0000000..6b90004
--- /dev/null
@@ -0,0 +1,63 @@
+/*
+ * Copyright (C) 2013 Mauro Carvalho Chehab <mchehab@redhat.com>
+ *
+ * The code below were adapted from Andi Kleen/Intel/SuSe mcelog code,
+ * released under GNU Public General License, v.2
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+*/
+
+#include <stdio.h>
+
+#include "ras-mce-handler.h"
+
+#define MCE_THERMAL_BANK       (MCE_EXTENDED_BANK + 0)
+#define MCE_TIMEOUT_BANK        (MCE_EXTENDED_BANK + 90)
+
+static char *bank_name(unsigned bank)
+{
+       static char buf[64];
+
+       switch (bank) {
+       case MCE_THERMAL_BANK:
+               return "THERMAL EVENT";
+       case MCE_TIMEOUT_BANK:
+               return "Timeout waiting for exception on other CPUs";
+       default:
+               sprintf(buf, "bank=%x", bank);
+               return buf;
+       }
+}
+
+void dump_intel_event(struct ras_events *ras,
+                     struct trace_seq *s, struct mce_event *e)
+{
+       trace_seq_printf(s, "%s ",bank_name(e->bank));
+       trace_seq_printf(s, ", mcgcap= %d ", e->mcgcap);
+       trace_seq_printf(s, ", mcgstatus= %d ", e->mcgstatus);
+       trace_seq_printf(s, ", status= %d ", e->status);
+       trace_seq_printf(s, ", addr= %d ", e->addr);
+       trace_seq_printf(s, ", misc= %d ", e->misc);
+       trace_seq_printf(s, ", ip= %d ", e->ip);
+       trace_seq_printf(s, ", tsc= %d ", e->tsc);
+       trace_seq_printf(s, ", walltime= %d ", e->walltime);
+       trace_seq_printf(s, ", cpu= %d ", e->cpu);
+       trace_seq_printf(s, ", cpuid= %d ", e->cpuid);
+       trace_seq_printf(s, ", apicid= %d ", e->apicid);
+       trace_seq_printf(s, ", socketid= %d ", e->socketid);
+       trace_seq_printf(s, ", cs= %d ", e->cs);
+       trace_seq_printf(s, ", cpuvendor= %d", e->cpuvendor);
+}
+
index a0d3c3234fdc7f461a1ca7754542f6aaf6160244..79e24229b3646f3bbf3b403dbaa0cd61c4d47fad 100644 (file)
 #include "ras-record.h"
 #include "ras-logger.h"
 
+/*
+ * The code below were adapted from Andi Kleen/Intel/SuSe mcelog code,
+ * released under GNU Public General License, v.2
+ */
 static char *cputype_name[] = {
        [CPU_GENERIC] = "generic CPU",
        [CPU_P6OLD] = "Intel PPro/P2/P3/old Xeon",
@@ -194,9 +198,14 @@ int register_mce_handler(struct ras_events *ras)
        return rc;
 }
 
+/*
+ * End of mcelog's code
+ */
+
 static void dump_mce_event(struct trace_seq *s, struct mce_event *e)
 {
-       trace_seq_printf(s, "mcgcap= %d ", e->mcgcap);
+       trace_seq_printf(s, "bank=%s ",e->bank);
+       trace_seq_printf(s, ", mcgcap= %d ", e->mcgcap);
        trace_seq_printf(s, ", mcgstatus= %d ", e->mcgstatus);
        trace_seq_printf(s, ", status= %d ", e->status);
        trace_seq_printf(s, ", addr= %d ", e->addr);
@@ -209,7 +218,6 @@ static void dump_mce_event(struct trace_seq *s, struct mce_event *e)
        trace_seq_printf(s, ", apicid= %d ", e->apicid);
        trace_seq_printf(s, ", socketid= %d ", e->socketid);
        trace_seq_printf(s, ", cs= %d ", e->cs);
-       trace_seq_printf(s, ", bank= %d ", e->bank);
        trace_seq_printf(s, ", cpuvendor= %d", e->cpuvendor);
 }
 
@@ -297,15 +305,14 @@ int ras_mce_event_handler(struct trace_seq *s,
                return -1;
        e.cpuvendor = val;
 
-       /*
-        * Default handler is to just output whatever is there.
-        *
-        * Latter patches will add parsing capabilities to the MCE events,
-        * in order to make them understandable by the end user, falling
-        * back to the simple dump if, for some reason, the parser is not
-        * able to properly decode it.
-        */
-       dump_mce_event(s, &e);
+       switch (mce->cputype) {
+       case CPU_GENERIC:
+               dump_mce_event(s, &e);
+       case CPU_K8:
+               dump_amd_k8_event(ras, s, &e);
+       default:                        /* All other CPU types are Intel */
+               dump_intel_event(ras, s, &e);
+       }
 
        return 0;
 }
index a39e6760b9016b81901300602039e27a337b4b5d..0f5cd1ea51fe2d2de3d1618bb3079780ae995b48 100644 (file)
@@ -24,6 +24,9 @@
 #include "ras-events.h"
 #include "libtrace/event-parse.h"
 
+
+#define ARRAY_SIZE(x) (sizeof(x)/sizeof(*(x)))
+
 enum cputype {
        CPU_GENERIC,
        CPU_P6OLD,
@@ -74,4 +77,14 @@ int ras_mce_event_handler(struct trace_seq *s,
                          struct pevent_record *record,
                          struct event_format *event, void *context);
 
+/* Software defined banks */
+#define MCE_EXTENDED_BANK      128
+
+/* Those functions are defined on per-cpu vendor C files */
+void dump_intel_event(struct ras_events *ras,
+                     struct trace_seq *s, struct mce_event *e);
+
+void dump_amd_k8_event(struct ras_events *ras,
+                      struct trace_seq *s, struct mce_event *e);
+
 #endif