]> www.infradead.org Git - users/mchehab/rasdaemon.git/commitdiff
mce-intel: add support to decode termal bank and mcg
authorMauro Carvalho Chehab <mchehab@redhat.com>
Thu, 16 May 2013 14:54:13 +0000 (11:54 -0300)
committerMauro Carvalho Chehab <mchehab@redhat.com>
Thu, 16 May 2013 14:54:13 +0000 (11:54 -0300)
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
mce-intel.c
ras-mce-handler.c
ras-mce-handler.h

index 39b8d7251bd2b502de13b6abd2c134bfd38e2f1a..c5ab42ca92e73cb2024cff335d57e329b46e06d9 100644 (file)
 */
 
 #include <string.h>
+#include <stdio.h>
 
 #include "ras-mce-handler.h"
 
 #define MCE_THERMAL_BANK       (MCE_EXTENDED_BANK + 0)
 #define MCE_TIMEOUT_BANK        (MCE_EXTENDED_BANK + 90)
 
+static decode_termal_bank(struct mce_event *e)
+{
+       if (e->status & 1)
+               sprintf(e->error_msg, "Processor %d heated above trip temperature. Throttling enabled. Please check your system cooling. Performance will be impacted", e->cpu);
+       else
+               sprintf(e->error_msg, "Processor %d below trip temperature. Throttling disabled", e->cpu);
+}
+
+static void decode_mcg(struct mce_event *e)
+{
+       int n, len = sizeof(e->mcgstatus_msg);
+       uint64_t mcgstatus = e->mcgstatus;
+       char *p = e->mcgstatus_msg;
+
+       n = snprintf(p, len, "mcgstatus= %d ", e->mcgstatus);
+
+       if (mcgstatus & MCG_STATUS_RIPV) {
+               n = snprintf(p, len, " RIPV");
+               p += n;
+               len -= n;
+       }
+       if (mcgstatus & MCG_STATUS_EIPV) {
+               n = snprintf(p, len, " EIPV");
+               p += n;
+               len -= n;
+       }
+       if (mcgstatus & MCG_STATUS_MCIP) {
+               n = snprintf(p, len, " MCIP");
+               p += n;
+               len -= n;
+       }
+}
+
 static void bank_name(struct mce_event *e)
 {
        char *buf = e->bank_name;
@@ -46,6 +80,12 @@ int parse_intel_event(struct ras_events *ras, struct mce_event *e)
 {
        bank_name(e);
 
+       if (e->bank == MCE_THERMAL_BANK) {
+               decode_termal_bank(e);
+               return 0;
+       }
+       decode_mcg(e);
+
        return 0;
 }
 
index a5df82e7ab2f82bf2b550e4d97ae917ef84fe29d..5b270345718f8326447f7371edefa19bf3919e1a 100644 (file)
@@ -268,9 +268,9 @@ static void report_mce_event(struct ras_events *ras,
        else
                trace_seq_printf(s, "bank=%x", e->bank);
 
-       trace_seq_printf(s, ", status= %d ", e->status);
+       trace_seq_printf(s, ", status= %d", e->status);
        if (*e->error_msg)
-               trace_seq_printf(s, ", %s ", e->error_msg);
+               trace_seq_printf(s, ", %s", e->error_msg);
 
 #if 0
        /*
@@ -278,42 +278,45 @@ static void report_mce_event(struct ras_events *ras,
         * decode/print it, if we already got the uptime from the
         * tracing event? Let's just discard it for now.
         */
-       trace_seq_printf(s, ", tsc= %d ", e->tsc);
-       trace_seq_printf(s, ", walltime= %d ", e->walltime);
+       trace_seq_printf(s, ", tsc= %d", e->tsc);
+       trace_seq_printf(s, ", walltime= %d", e->walltime);
 #endif
 
-       trace_seq_printf(s, "CPU: %s, ", cputype_name[mce->cputype]);
-       trace_seq_printf(s, ", cpu= %d ", e->cpu);
-       trace_seq_printf(s, ", socketid= %d ", e->socketid);
+       trace_seq_printf(s, ", CPU: %s,", cputype_name[mce->cputype]);
+       trace_seq_printf(s, ", cpu= %d", e->cpu);
+       trace_seq_printf(s, ", socketid= %d", e->socketid);
 
 #if 0
        /*
         * The CPU vendor is already reported from mce->cputype
         */
        trace_seq_printf(s, ", cpuvendor= %d", e->cpuvendor);
-       trace_seq_printf(s, ", cpuid= %d ", e->cpuid);
+       trace_seq_printf(s, ", cpuid= %d", e->cpuid);
 #endif
 
        if (e->ip)
-               trace_seq_printf(s, ", ip= %d%s ",
+               trace_seq_printf(s, ", ip= %d%s",
                                 !(e->mcgstatus & MCG_STATUS_EIPV) ? " (INEXACT)" : "",
                                 e->ip);
 
        if (e->cs)
-               trace_seq_printf(s, ", cs= %d ", e->cs);
+               trace_seq_printf(s, ", cs= %d", e->cs);
 
        if (e->status & MCI_STATUS_MISCV)
-               trace_seq_printf(s, ", misc= %d ", e->misc);
+               trace_seq_printf(s, ", misc= %d", e->misc);
 
        if (e->status & MCI_STATUS_ADDRV)
-               trace_seq_printf(s, ", addr= %d ", e->addr);
+               trace_seq_printf(s, ", addr= %d", e->addr);
 
-       trace_seq_printf(s, ", mcgstatus= %d ", e->mcgstatus);
+       if (e->mcgstatus_msg)
+               trace_seq_printf(s, ", %s", e->mcgstatus_msg);
+       else
+               trace_seq_printf(s, ", mcgstatus= %d", e->mcgstatus);
 
        if (e->mcgcap)
-               trace_seq_printf(s, ", mcgcap= %d ", e->mcgcap);
+               trace_seq_printf(s, ", mcgcap= %d", e->mcgcap);
 
-       trace_seq_printf(s, ", apicid= %d ", e->apicid);
+       trace_seq_printf(s, ", apicid= %d", e->apicid);
 
        /*
         * FIXME: The original mcelog userspace tool uses DMI to map from
index 9267681b7ed6471cb352b8fe0766d6fb22e78a88..e28bc9c3c43f4140aab2d63e866ab3d633b72110 100644 (file)
@@ -66,6 +66,7 @@ struct mce_event {
        char            timestamp[64];
        char            bank_name[64];
        char            error_msg[4096];
+       char            mcgstatus_msg[256];
 };
 
 struct mce_priv {