#define MCE_THERMAL_BANK (MCE_EXTENDED_BANK + 0)
#define MCE_TIMEOUT_BANK (MCE_EXTENDED_BANK + 90)
+#define TLB_LL_MASK 0x3 /*bit 0, bit 1*/
+#define TLB_LL_SHIFT 0x0
+#define TLB_TT_MASK 0xc /*bit 2, bit 3*/
+#define TLB_TT_SHIFT 0x2
+
+#define CACHE_LL_MASK 0x3 /*bit 0, bit 1*/
+#define CACHE_LL_SHIFT 0x0
+#define CACHE_TT_MASK 0xc /*bit 2, bit 3*/
+#define CACHE_TT_SHIFT 0x2
+#define CACHE_RRRR_MASK 0xF0 /*bit 4, bit 5, bit 6, bit 7 */
+#define CACHE_RRRR_SHIFT 0x4
+
+#define BUS_LL_MASK 0x3 /* bit 0, bit 1*/
+#define BUS_LL_SHIFT 0x0
+#define BUS_II_MASK 0xc /*bit 2, bit 3*/
+#define BUS_II_SHIFT 0x2
+#define BUS_RRRR_MASK 0xF0 /*bit 4, bit 5, bit 6, bit 7 */
+#define BUS_RRRR_SHIFT 0x4
+#define BUS_T_MASK 0x100 /*bit 8*/
+#define BUS_T_SHIFT 0x8
+#define BUS_PP_MASK 0x600 /*bit 9, bit 10*/
+#define BUS_PP_SHIFT 0x9
+
+#define MCG_TES_P (1ULL<<11) /* Yellow bit cache threshold supported */
+
+
+static char *TT[] = {
+ "Instruction",
+ "Data",
+ "Generic",
+ "Unknown"
+};
+
+static char *LL[] = {
+ "Level-0",
+ "Level-1",
+ "Level-2",
+ "Level-3"
+};
+
+static struct {
+ uint8_t value;
+ char* str;
+} RRRR [] = {
+ {0, "Generic"},
+ {1, "Read"},
+ {2, "Write" },
+ {3, "Data-Read"},
+ {4, "Data-Write"},
+ {5, "Instruction-Fetch"},
+ {6, "Prefetch"},
+ {7, "Eviction"},
+ {8, "Snoop"}
+};
+
+static char *PP[] = {
+ "Local-CPU-originated-request",
+ "Responed-to-request",
+ "Observed-error-as-third-party",
+ "Generic"
+};
+
+static char *T[] = {
+ "Request-did-not-timeout",
+ "Request-timed-out"
+};
+
+static char *II[] = {
+ "Memory-access",
+ "Reserved",
+ "IO",
+ "Other-transaction"
+};
+
+static char *mca_msg[] = {
+ [0] = "No Error",
+ [1] = "Unclassified",
+ [2] = "Microcode ROM parity error",
+ [3] = "External error",
+ [4] = "FRC error",
+ [5] = "Internal parity error",
+};
+
+static char *tracking_msg[] = {
+ [1] = "green",
+ [2] = "yellow",
+ [3] ="res3"
+};
+
+static const char *arstate[4] = {
+ [0] = "UCNA",
+ [1] = "AR",
+ [2] = "SRAO",
+ [3] = "SRAR"
+};
+
+static char *mmm_mnemonic[] = {
+ "GEN", "RD", "WR", "AC", "MS", "RES5", "RES6", "RES7"
+};
+
+static char *mmm_desc[] = {
+ "Generic undefined request",
+ "Memory read error",
+ "Memory write error",
+ "Address/Command error",
+ "Memory scrubbing error",
+ "Reserved 5",
+ "Reserved 6",
+ "Reserved 7"
+};
+
+void decode_memory_controller(struct mce_event *e, uint32_t status)
+{
+ char channel[30];
+ if ((status & 0xf) == 0xf)
+ mce_snprintf(e->mc_channel, "unspecified");
+ else
+ mce_snprintf(e->mc_channel, "%u", status & 0xf);
+ mce_snprintf(e->error_msg, "MEMORY CONTROLLER %s_CHANNEL%s_ERR\n",
+ mmm_mnemonic[(status >> 4) & 7],
+ channel);
+ mce_snprintf(e->error_msg, "Transaction: %s\n",
+ mmm_desc[(status >> 4) & 7]);
+}
+
static decode_termal_bank(struct mce_event *e)
{
if (e->status & 1) {
mce_snprintf(e->mcgstatus_msg, "Processor %d heated above trip temperature. Throttling enabled.", e->cpu);
mce_snprintf(e->user_action, "Please check your system cooling. Performance will be impacted");
} else {
- sprintf(e->error_msg, "Processor %d below trip temperature. Throttling disabled", e->cpu);
+ mce_snprintf(e->error_msg, "Processor %d below trip temperature. Throttling disabled", e->cpu);
}
}
}
}
+static char *get_RRRR_str(uint8_t rrrr)
+{
+ unsigned i;
+
+ for (i = 0; i < ARRAY_SIZE(RRRR); i++) {
+ if (RRRR[i].value == rrrr) {
+ return RRRR[i].str;
+ }
+ }
+
+ return "UNKNOWN";
+}
+
+#define decode_attr(arr, val) ({ \
+ char *__str; \
+ if ((unsigned)(val) >= ARRAY_SIZE(arr)) \
+ __str = "UNKNOWN"; \
+ else \
+ __str = (arr)[val]; \
+ __str; \
+})
+
+static int test_prefix(int nr, uint32_t value)
+{
+ return ((value >> nr) == 1);
+}
+
+static void decode_mca(struct mce_event *e, uint64_t track, int *ismemerr)
+{
+ uint32_t mca = e->status & 0xffffL;
+
+ if (mca & (1UL << 12)) {
+ mce_snprintf(e->mcastatus_msg,
+ "corrected filtering (some unreported errors in same region)");
+ mca &= ~(1UL << 12);
+ }
+
+ if (mca < ARRAY_SIZE(mca_msg)) {
+ mce_snprintf(e->mcastatus_msg, "%s", mca_msg[mca]);
+ return;
+ }
+
+ if ((mca >> 2) == 3) {
+ mce_snprintf(e->mcastatus_msg,
+ "%s Generic memory hierarchy error\n",
+ decode_attr(LL, mca & 3));
+ } else if (test_prefix(4, mca)) {
+ mce_snprintf(e->mcastatus_msg, "%s TLB %s Error\n",
+ decode_attr(TT, (mca & TLB_TT_MASK) >> TLB_TT_SHIFT),
+ decode_attr(LL, (mca & TLB_LL_MASK) >> TLB_LL_SHIFT));
+ } else if (test_prefix(8, mca)) {
+ unsigned typenum = (mca & CACHE_TT_MASK) >> CACHE_TT_SHIFT;
+ unsigned levelnum = (mca & CACHE_LL_MASK) >> CACHE_LL_SHIFT;
+ char *type = decode_attr(TT, typenum);
+ char *level = decode_attr(LL, levelnum);
+ mce_snprintf(e->mcastatus_msg,
+ "%s CACHE %s %s Error\n", type, level,
+ get_RRRR_str((mca & CACHE_RRRR_MASK) >>
+ CACHE_RRRR_SHIFT));
+#if 0
+ /* FIXME: We shouldn't mix parsing with actions */
+ if (track == 2)
+ run_yellow_trigger(e->cpu, typenum, levelnum, type, level, e->socket);
+#endif
+ } else if (test_prefix(10, mca)) {
+ if (mca == 0x400)
+ mce_snprintf(e->mcastatus_msg,
+ "Internal Timer error\n");
+ else
+ mce_snprintf(e->mcastatus_msg,
+ "Internal unclassified error: %x\n",
+ mca);
+ } else if (test_prefix(11, mca)) {
+ mce_snprintf(e->mcastatus_msg, "BUS %s %s %s %s %s Error\n",
+ decode_attr(LL, (mca & BUS_LL_MASK) >> BUS_LL_SHIFT),
+ decode_attr(PP, (mca & BUS_PP_MASK) >> BUS_PP_SHIFT),
+ get_RRRR_str((mca & BUS_RRRR_MASK) >> BUS_RRRR_SHIFT),
+ decode_attr(II, (mca & BUS_II_MASK) >> BUS_II_SHIFT),
+ decode_attr(T, (mca & BUS_T_MASK) >> BUS_T_SHIFT));
+ } else if (test_prefix(7, mca)) {
+ decode_memory_controller(e, mca);
+ *ismemerr = 1;
+ } else
+ mce_snprintf(e->mcastatus_msg, "Unknown Error %x\n", mca);
+}
+
+static void decode_tracking(struct mce_event *e, uint64_t track)
+{
+ if (track == 1)
+ mce_snprintf(e->user_action,
+ "Large number of corrected cache errors. System operating, but might leadto uncorrected errors soon");
+
+ if (track)
+ mce_snprintf(e->mcistatus_msg, "Threshold based error status: %s",
+ tracking_msg[track]);
+}
+
+static void decode_mci(struct mce_event *e, int *ismemerr)
+{
+ uint64_t track = 0;
+
+ if (!(e->status & MCI_STATUS_VAL))
+ mce_snprintf(e->mcistatus_msg, "MCE_INVALID");
+
+ if (e->status & MCI_STATUS_OVER)
+ mce_snprintf(e->mcistatus_msg, "Error_overflow");
+
+ /* FIXME: convert into severity */
+ if (e->status & MCI_STATUS_UC)
+ mce_snprintf(e->mcistatus_msg, "Uncorrected_error");
+ else
+ mce_snprintf(e->mcistatus_msg, "Corrected_error");
+
+
+ if (e->status & MCI_STATUS_EN)
+ mce_snprintf(e->mcistatus_msg, "Error_enabled");
+
+
+ if (e->status & MCI_STATUS_PCC)
+ mce_snprintf(e->mcistatus_msg, "Processor_context_corrupt");
+
+ if (e->status & (MCI_STATUS_S|MCI_STATUS_AR))
+ mce_snprintf(e->mcistatus_msg, "%s\n",
+ arstate[(e->status >> 55) & 3]);
+
+ if ((e->mcgcap == 0 || (e->mcgcap & MCG_TES_P)) &&
+ !(e->status & MCI_STATUS_UC)) {
+ track = (e->status >> 53) & 3;
+ decode_tracking(e, track);
+ }
+
+ decode_mca(e, track, ismemerr);
+}
+
int parse_intel_event(struct ras_events *ras, struct mce_event *e)
{
+ int ismemerr;
+
bank_name(e);
if (e->bank == MCE_THERMAL_BANK) {
return 0;
}
decode_mcg(e);
+ decode_mci(e, &ismemerr);
+
+ /* FIXME: add per-CPU-type specific handlers */
return 0;
}
-