rasdaemon_SOURCES += ras-aer-handler.c
endif
if WITH_MCE
- rasdaemon_SOURCES += ras-mce-handler.c mce-intel.c mce-amd-k8.c
+ rasdaemon_SOURCES += ras-mce-handler.c mce-intel.c mce-amd-k8.c \
+ bitfield.c mce-intel-core2.c
endif
rasdaemon_LDADD = -lpthread $(SQLITE3_LIBS) libtrace/libtrace.a
-include_HEADERS = config.h ras-events.h ras-logger.h ras-mc-handler.h ras-record.h
+include_HEADERS = config.h ras-events.h ras-logger.h ras-mc-handler.h \
+ ras-record.h bitfield.h
--- /dev/null
+/*
+ * Copyright (C) 2013 Mauro Carvalho Chehab <mchehab@redhat.com>
+ *
+ * The code below were adapted from Andi Kleen/Intel/SuSe mcelog code,
+ * released under GNU Public General License, v.2
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+*/
+
+#include <string.h>
+#include <stdio.h>
+
+#include "ras-mce-handler.h"
+#include "bitfield.h"
+
+char *reserved_3bits[8];
+char *reserved_1bit[2];
+char *reserved_2bits[4];
+
+static uint64_t bitmask(uint64_t i)
+{
+ uint64_t mask = 1;
+ while (mask < i)
+ mask = (mask << 1) | 1;
+ return mask;
+}
+
+void decode_bitfield(struct mce_event *e, uint64_t status,
+ struct field *fields)
+{
+ struct field *f;
+ char buf[60];
+
+ for (f = fields; f->str; f++) {
+ uint64_t v = (status >> f->start_bit) & bitmask(f->stringlen - 1);
+ char *s = NULL;
+ if (v < f->stringlen)
+ s = f->str[v];
+ if (!s) {
+ if (v == 0)
+ continue;
+ mce_snprintf(e->error_msg, "<%u:%llx>",
+ f->start_bit, v);
+ }
+ }
+}
+
+void decode_numfield(struct mce_event *e, uint64_t status,
+ struct numfield *fields)
+{
+ struct numfield *f;
+ for (f = fields; f->name; f++) {
+ uint64_t mask = (1ULL << (f->end - f->start + 1)) - 1;
+ uint64_t v = (status >> f->start) & mask;
+ if (v > 0 || f->force) {
+ mce_snprintf(e->error_msg, "%%s: %s\n",
+ f->fmt ? f->fmt : "%Lu");
+ }
+ }
+}
--- /dev/null
+/*
+ * The code below came from Andi Kleen/Intel/SuSe mcelog code,
+ * released under GNU Public General License, v.2
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+*/
+
+/* Generic bitfield decoder */
+
+struct field {
+ unsigned start_bit;
+ char **str;
+ unsigned stringlen;
+};
+
+struct numfield {
+ unsigned start, end;
+ char *name;
+ char *fmt;
+ int force;
+};
+
+#define FIELD(start_bit, name) { start_bit, name, ARRAY_SIZE(name) }
+#define SBITFIELD(start_bit, string) { start_bit, ((char * [2]) { NULL, string }), 2 }
+
+#define NUMBER(start, end, name) { start, end, name, "%Lu", 0 }
+#define NUMBERFORCE(start, end, name) { start, end, name, "%Lu", 1 }
+#define HEXNUMBER(start, end, name) { start, end, name, "%Lx", 0 }
+#define HEXNUMBERFORCE(start, end, name) { start, end, name, "%Lx", 1 }
+
+void decode_bitfield(struct mce_event *e, uint64_t status,
+ struct field *fields);
+void decode_numfield(struct mce_event *e, uint64_t status,
+ struct numfield *fields);
+
+#define MASK(x) ((1ULL << (1 + (x))) - 1)
+#define EXTRACT(v, a, b) (((v) >> (a)) & MASK((b)-(a)))
+
+static inline int test_prefix(int nr, uint32_t value)
+{
+ return ((value >> nr) == 1);
+}
--- /dev/null
+/*
+ * The code below came from Andi Kleen/Intel/SuSe mcelog code,
+ * released under GNU Public General License, v.2
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+*/
+
+#include <string.h>
+#include <stdio.h>
+
+#include "ras-mce-handler.h"
+#include "bitfield.h"
+
+/* Decode P6 family (Core2) model specific errors.
+ The generic errors are decoded in p4.c */
+
+/* [19..24] */
+static char *bus_queue_req_type[] = {
+ [0] = "BQ_DCU_READ_TYPE",
+ [2] = "BQ_IFU_DEMAND_TYPE",
+ [3] = "BQ_IFU_DEMAND_NC_TYPE",
+ [4] = "BQ_DCU_RFO_TYPE",
+ [5] = "BQ_DCU_RFO_LOCK_TYPE",
+ [6] = "BQ_DCU_ITOM_TYPE",
+ [8] = "BQ_DCU_WB_TYPE",
+ [10] = "BC_DCU_WCEVICT_TYPE",
+ [11] = "BQ_DCU_WCLINE_TYPE",
+ [12] = "BQ_DCU_BTM_TYPE",
+ [13] = "BQ_DCU_INTACK_TYPE",
+ [14] = "BQ_DCU_INVALL2_TYPE",
+ [15] = "BQ_DCU_FLUSHL2_TYPE",
+ [16] = "BQ_DCU_PART_RD_TYPE",
+ [18] = "BQ_DCU_PART_WR_TYPE",
+ [20] = "BQ_DCU_SPEC_CYC_TYPE",
+ [24] = "BQ_DCU_IO_RD_TYPE",
+ [25] = "BQ_DCU_IO_WR_TYPE",
+ [28] = "BQ_DCU_LOCK_RD_TYPE",
+ [30] = "BQ_DCU_SPLOCK_RD_TYPE",
+ [29] = "BQ_DCU_LOCK_WR_TYPE",
+};
+
+/* [25..27] */
+static char *bus_queue_error_type[] = {
+ [0] = "BQ_ERR_HARD_TYPE",
+ [1] = "BQ_ERR_DOUBLE_TYPE",
+ [2] = "BQ_ERR_AERR2_TYPE",
+ [4] = "BQ_ERR_SINGLE_TYPE",
+ [5] = "BQ_ERR_AERR1_TYPE",
+};
+
+static struct field p6_shared_status[] = {
+ FIELD(16, NULL),
+ FIELD(19, bus_queue_req_type),
+ FIELD(25, bus_queue_error_type),
+ FIELD(25, bus_queue_error_type),
+ SBITFIELD(30, "internal BINIT"),
+ SBITFIELD(36, "received parity error on response transaction"),
+ SBITFIELD(38, "timeout BINIT (ROB timeout)."
+ " No micro-instruction retired for some time"),
+ FIELD(39, NULL),
+ SBITFIELD(42, "bus transaction received hard error response"),
+ SBITFIELD(43, "failure that caused IERR"),
+ /* The following are reserved for Core in the SDM. Let's keep them here anyways*/
+ SBITFIELD(44, "two failing bus transactions with address parity error (AERR)"),
+ SBITFIELD(45, "uncorrectable ECC error"),
+ SBITFIELD(46, "correctable ECC error"),
+ /* [47..54]: ECC syndrome */
+ FIELD(55, NULL),
+ {},
+};
+
+static struct field p6old_status[] = {
+ SBITFIELD(28, "FRC error"),
+ SBITFIELD(29, "BERR on this CPU"),
+ FIELD(31, NULL),
+ FIELD(32, NULL),
+ SBITFIELD(35, "BINIT received from external bus"),
+ SBITFIELD(37, "Received hard error reponse on split transaction (Bus BINIT)"),
+ {}
+};
+
+static struct field core2_status[] = {
+ SBITFIELD(28, "MCE driven"),
+ SBITFIELD(29, "MCE is observed"),
+ SBITFIELD(31, "BINIT observed"),
+ FIELD(32, NULL),
+ SBITFIELD(34, "PIC or FSB data parity error"),
+ FIELD(35, NULL),
+ SBITFIELD(37, "FSB address parity error detected"),
+ {}
+};
+
+static struct numfield p6old_status_numbers[] = {
+ HEXNUMBER(47, 54, "ECC syndrome"),
+ {}
+};
+
+void core2_decode_model(struct mce_event *e)
+{
+ uint64_t status = e->status;
+
+ decode_bitfield(e, status, p6_shared_status);
+ decode_bitfield(e, status, core2_status);
+ /* Normally reserved, but let's parse anyways: */
+ decode_numfield(e, status, p6old_status_numbers);
+}
+
+void p6old_decode_model(struct mce_event *e)
+{
+ uint64_t status = e->status;
+
+ decode_bitfield(e, status, p6_shared_status);
+ decode_bitfield(e, status, p6old_status);
+ decode_numfield(e, status, p6old_status_numbers);
+}
#include <stdio.h>
#include "ras-mce-handler.h"
+#include "bitfield.h"
#define MCE_THERMAL_BANK (MCE_EXTENDED_BANK + 0)
#define MCE_TIMEOUT_BANK (MCE_EXTENDED_BANK + 90)
__str; \
})
-static int test_prefix(int nr, uint32_t value)
-{
- return ((value >> nr) == 1);
-}
-
static void decode_mca(struct mce_event *e, uint64_t track, int *ismemerr)
{
uint32_t mca = e->status & 0xffffL;
int parse_intel_event(struct ras_events *ras, struct mce_event *e)
{
+ struct mce_priv *mce = ras->mce_priv;
int ismemerr;
bank_name(e);
decode_mcg(e);
decode_mci(e, &ismemerr);
- /* FIXME: add per-CPU-type specific handlers */
+ if (test_prefix(11, (e->status & 0xffffL))) {
+ switch(mce->cputype) {
+ case CPU_P6OLD:
+ p6old_decode_model(e);
+ break;
+ case CPU_DUNNINGTON:
+ case CPU_CORE2:
+ core2_decode_model(e);
+ break;
+#if 0
+ case CPU_TULSA:
+ case CPU_P4:
+ p4_decode_model(e);
+ break;
+ case CPU_NEHALEM:
+ case CPU_XEON75XX:
+ core2_decode_model(e);
+ break;
+#endif
+ }
+ }
+#if 0
+ switch(mce->cputype) {
+ case CPU_NEHALEM:
+ nehalem_decode_model(e);
+ break;
+ case CPU_DUNNINGTON:
+ dunnington_decode_model(e);
+ break;
+ case CPU_TULSA:
+ tulsa_decode_model(e);
+ break;
+ case CPU_XEON75XX:
+ xeon75xx_decode_model(e);
+ break;
+ case CPU_SANDY_BRIDGE:
+ case CPU_SANDY_BRIDGE_EP:
+ snb_decode_model(ras, e);
+ break;
+ case CPU_IVY_BRIDGE_EPEX:
+ ivb_decode_model(ras, e);
+ break;
+ }
+#endif
return 0;
}
unsigned bit_offset, unsigned ignore_bits,
uint64_t status);
+/* Per-CPU-type decoders for Intel CPUs */
+void core2_decode_model(struct mce_event *e);
+void p6old_decode_model(struct mce_event *e);
+
+
/* Software defined banks */
#define MCE_EXTENDED_BANK 128